| Web Searches |
@@ -828,6 +932,11 @@ Provider Support Matrix
* Azure inherits OpenAI’s override support because Azure OpenAI routes
through the OpenAI Chat Completions response format internally.
+
diff --git a/docs/record-replay/index.html b/docs/record-replay/index.html
index d35c3afa..062ab46c 100644
--- a/docs/record-replay/index.html
+++ b/docs/record-replay/index.html
@@ -429,6 +429,25 @@ Stream Collapsing
simple { content } or { toolCalls } fixture response.
+ Recording Block Order
+
+ When a recorded stream is genuinely tool-first or interleaved — a tool-call
+ delta arrives before the first content delta, or content arrives after a tool-call delta
+ — the collapser preserves that arrival order as a
+ blocks array on the fixture. This
+ works across OpenAI, Anthropic, Gemini, Ollama, Cohere, and Bedrock. Ordinary
+ text-then-tools streams are saved in the legacy { content, toolCalls } shape
+ with no blocks key, so existing recordings round-trip byte-identically.
+
+
+ Gemini Interactions is the exception: its record-side collapser
+ normalizes tool-call arguments only and does not reorder blocks on capture. Ordering is
+ still honored on replay from a hand-authored blocks fixture; it is simply not
+ reconstructed automatically from a recording. See the
+ per-provider observability matrix for how
+ faithfully block order is reconstructable on each provider's wire.
+
+
Header Forwarding
When proxying to upstream providers, aimock forwards the original request's headers except
diff --git a/fixtures/examples/llm/blocks-tool-first.json b/fixtures/examples/llm/blocks-tool-first.json
new file mode 100644
index 00000000..4ef6e8ef
--- /dev/null
+++ b/fixtures/examples/llm/blocks-tool-first.json
@@ -0,0 +1,13 @@
+{
+ "fixtures": [
+ {
+ "match": { "userMessage": "what's the weather in NYC?" },
+ "response": {
+ "blocks": [
+ { "type": "toolCall", "name": "get_weather", "arguments": "{\"city\": \"NYC\"}" },
+ { "type": "text", "text": "Let me check the weather in NYC for you." }
+ ]
+ }
+ }
+ ]
+}
diff --git a/packages/aimock-pytest/README.md b/packages/aimock-pytest/README.md
index 7b01b48a..64ced8b8 100644
--- a/packages/aimock-pytest/README.md
+++ b/packages/aimock-pytest/README.md
@@ -53,6 +53,12 @@ aimock.on_system_message("name=Atai", {"content": "..."}, user_message="who am I
# Array form: all substrings must appear in the joined system text (AND)
aimock.on_system_message(["name=Atai", "tz=PST"], {"content": "..."})
aimock.add_fixture(match={...}, response={...}, chunkSize=10, latency=50)
+# Ordered blocks: stream a tool call before text (tool-first / interleaved).
+# A blocks-only response is first-class — see /fixtures#ordered-blocks
+aimock.add_fixture(match={...}, response={"blocks": [
+ {"type": "toolCall", "name": "get_weather", "arguments": {"city": "SF"}},
+ {"type": "text", "text": "Here is the weather."},
+]})
aimock.load_fixtures("path/to/fixtures.json")
# Inspect
diff --git a/skills/write-fixtures/SKILL.md b/skills/write-fixtures/SKILL.md
index ffefa890..8c3de1a4 100644
--- a/skills/write-fixtures/SKILL.md
+++ b/skills/write-fixtures/SKILL.md
@@ -92,6 +92,24 @@ Multi-part content (e.g., `[{type: "text", text: "hello"}]`) is automatically ex
**Both object and string forms are accepted** for `arguments`. The fixture loader auto-stringifies objects via `JSON.stringify()`. Object form is preferred for readability.
+### Blocks (ordered text / tool-call streaming)
+
+The optional `blocks` array expresses an explicit, ordered sequence of stream entries — something plain `content` + `toolCalls` cannot, since those imply text-then-tools. Each entry is either `{ "type": "text", "text": "..." }` or `{ "type": "toolCall", "name": "...", "arguments": "...", "id"?: "..." }`, streamed in array order. This enables tool-first ordering (a tool call before any text) and interleaved text/tool ordering.
+
+```typescript
+// Tool-first: tool call streams before the text
+{
+ blocks: [
+ { type: "toolCall", name: "get_weather", arguments: { city: "SF" } },
+ { type: "text", text: "Checking the weather for you…" },
+ ];
+}
+```
+
+When `blocks` is present it takes **precedence over `content`/`toolCalls`** for stream order; when absent, legacy behavior is unchanged. blocks-only fixtures are first-class — a response may be just `{ blocks: [...] }` with no `content` and no `toolCalls`, and builders derive the aggregate `content`/`tool_calls` from the blocks. A toolCall block's `arguments` may be a JSON object or a string (objects auto-stringify), exactly like top-level `toolCalls`.
+
+Replay caveat: block order is observable on some providers and not others — see the [per-provider observability matrix](../../docs/fixtures/index.html#ordered-blocks).
+
### Embedding
```typescript
@@ -364,7 +382,7 @@ mock.nextRequestError(429, { message: "Rate limited", type: "rate_limit_error" }
}
```
-**JSON auto-stringify**: In JSON fixture files, `arguments` and `content` can be objects — the loader auto-stringifies them with `JSON.stringify()`. The escaped-string form (`"{\"city\":\"SF\"}"`) still works but objects are preferred for readability.
+**JSON auto-stringify**: In JSON fixture files, `arguments` and `content` can be objects — the loader auto-stringifies them with `JSON.stringify()`. This also applies to a `blocks` entry's `arguments` — object form auto-stringifies just like top-level `toolCalls`. The escaped-string form (`"{\"city\":\"SF\"}"`) still works but objects are preferred for readability.
JSON files cannot use `RegExp` or `predicate` — those are code-only features. `streamingProfile` is supported in JSON fixture files.
diff --git a/src/__tests__/blocks-fixture-tolerance.test.ts b/src/__tests__/blocks-fixture-tolerance.test.ts
new file mode 100644
index 00000000..41648607
--- /dev/null
+++ b/src/__tests__/blocks-fixture-tolerance.test.ts
@@ -0,0 +1,157 @@
+/**
+ * #274 — Two F0-reachable validator/resolver defects on the `blocks` path.
+ *
+ * BUG A (programmatic crash): `addFixture`/`addFixtures`/`prependFixture` store
+ * a RAW fixture with no `normalizeResponse` pass, so a `toolCall` block written
+ * with an OBJECT `arguments` value reaches `resolveFixtureBlocks` unchanged.
+ * That resolver previously REQUIRED a string and threw, so real dispatch
+ * returned HTTP >= 500. The fix makes `resolveFixtureBlocks` tolerant: it
+ * stringifies an object `arguments` (mirroring `normalizeResponse`'s
+ * `JSON.stringify`) so the programmatic path is safe. The file-load path is
+ * unchanged because string `arguments` stay byte-identical.
+ *
+ * BUG C (spurious hard error): a fixture `{ content: "", blocks: [...] }` raised
+ * a "content is empty string" HARD error at validate even though the builder
+ * ignores `content` whenever a non-empty `blocks` array is present (`content`
+ * is a legacy mirror). The fix suppresses the empty-content error when
+ * non-empty `blocks` drive the output; fixtures WITHOUT blocks still error.
+ *
+ * NOTE: the empty-TEXT-block rule (`validateBlocks` rejecting `{type:"text",
+ * text:""}`) is intentionally strict and is NOT changed here.
+ */
+import { describe, it, expect, afterEach, beforeEach } from "vitest";
+import { mkdtempSync, rmSync, writeFileSync } from "node:fs";
+import { tmpdir } from "node:os";
+import { join } from "node:path";
+import { entryToFixture, validateFixtures } from "../fixture-loader.js";
+import { resolveFixtureBlocks } from "../helpers.js";
+import { LLMock } from "../llmock.js";
+
+let mock: LLMock | null = null;
+let tmpDir: string;
+
+beforeEach(() => {
+ tmpDir = mkdtempSync(join(tmpdir(), "blocks-tolerance-"));
+});
+
+afterEach(async () => {
+ if (mock) {
+ await mock.stop();
+ mock = null;
+ }
+ rmSync(tmpDir, { recursive: true, force: true });
+});
+
+describe("BUG A — toolCall block with OBJECT arguments on the programmatic path", () => {
+ it("resolveFixtureBlocks stringifies object arguments instead of throwing", () => {
+ const blocks = [{ type: "toolCall", name: "f", arguments: { a: 1 } }] as unknown as Parameters<
+ typeof resolveFixtureBlocks
+ >[0];
+ const resolved = resolveFixtureBlocks(blocks);
+ const arg = (resolved[0] as { arguments: unknown }).arguments;
+ expect(typeof arg).toBe("string");
+ expect(arg).toBe(JSON.stringify({ a: 1 }));
+ });
+
+ it("string arguments stay byte-identical (file-load path unchanged)", () => {
+ const blocks = [{ type: "toolCall", name: "f", arguments: '{"a":1}' }] as unknown as Parameters<
+ typeof resolveFixtureBlocks
+ >[0];
+ const resolved = resolveFixtureBlocks(blocks);
+ expect((resolved[0] as { arguments: unknown }).arguments).toBe('{"a":1}');
+ });
+
+ it("addFixture with object block arguments dispatches without HTTP >= 500", async () => {
+ mock = new LLMock({ port: 0 });
+ // addFixture stores RAW — no normalizeResponse. Object args reach the resolver.
+ mock.addFixture({
+ match: { userMessage: "hi" },
+ response: {
+ blocks: [{ type: "toolCall", name: "get_weather", arguments: { city: "NYC" } }],
+ },
+ } as never);
+ await mock.start();
+
+ const res = await fetch(`${mock.url}/v1/chat/completions`, {
+ method: "POST",
+ headers: { "content-type": "application/json" },
+ body: JSON.stringify({
+ model: "gpt-4o",
+ messages: [{ role: "user", content: "hi" }],
+ stream: true,
+ }),
+ });
+ const body = await res.text();
+ expect(res.status).toBeLessThan(500);
+ // The stringified arguments must reach the wire.
+ expect(body).toContain("NYC");
+ });
+});
+
+describe("BUG C — content:'' alongside a non-empty blocks array", () => {
+ it("validateFixtures does NOT raise 'content is empty string' when blocks drive output", () => {
+ const fixtures = [
+ entryToFixture({
+ match: { userMessage: "hi" },
+ response: { content: "", blocks: [{ type: "text", text: "hello" }] },
+ } as never),
+ ];
+ const errors = validateFixtures(fixtures).filter((i) => i.severity === "error");
+ expect(errors.some((e) => /content is empty string/.test(e.message))).toBe(false);
+ });
+
+ it("validateFixtures STILL raises 'content is empty string' WITHOUT blocks", () => {
+ const fixtures = [
+ entryToFixture({
+ match: { userMessage: "hi" },
+ response: { content: "" },
+ } as never),
+ ];
+ const errors = validateFixtures(fixtures).filter((i) => i.severity === "error");
+ expect(errors.some((e) => /content is empty string/.test(e.message))).toBe(true);
+ });
+
+ it("the empty-TEXT-block rule stays strict (unchanged)", () => {
+ const fixtures = [
+ entryToFixture({
+ match: { userMessage: "hi" },
+ response: { content: "", blocks: [{ type: "text", text: "" }] },
+ } as never),
+ ];
+ const errors = validateFixtures(fixtures).filter((i) => i.severity === "error");
+ expect(errors.some((e) => /text is empty string/.test(e.message))).toBe(true);
+ });
+
+ it("a content:'' + non-empty blocks fixture loads clean and streams", async () => {
+ const filePath = join(tmpDir, "content-empty-blocks.json");
+ writeFileSync(
+ filePath,
+ JSON.stringify({
+ fixtures: [
+ {
+ match: { userMessage: "stream please" },
+ response: { content: "", blocks: [{ type: "text", text: "Here you go." }] },
+ },
+ ],
+ }),
+ "utf-8",
+ );
+
+ mock = new LLMock({ port: 0 });
+ mock.loadFixtureFile(filePath);
+ await mock.start();
+
+ const res = await fetch(`${mock.url}/v1/chat/completions`, {
+ method: "POST",
+ headers: { "content-type": "application/json" },
+ body: JSON.stringify({
+ model: "gpt-4o",
+ messages: [{ role: "user", content: "stream please" }],
+ stream: true,
+ }),
+ });
+ const body = await res.text();
+ expect(res.status).toBeLessThan(500);
+ expect(body).toContain("Here you go.");
+ });
+});
diff --git a/src/__tests__/content-with-toolcalls.test.ts b/src/__tests__/content-with-toolcalls.test.ts
index a35bcc6a..52563ddb 100644
--- a/src/__tests__/content-with-toolcalls.test.ts
+++ b/src/__tests__/content-with-toolcalls.test.ts
@@ -52,8 +52,11 @@ describe("resolveFixtureBlocks", () => {
{ type: "toolCall", name: "get_time", arguments: "{}", id: "call_1" },
];
const result = resolveFixtureBlocks(blocks);
- // Same reference, same order — passthrough, not reconstruction.
- expect(result).toBe(blocks);
+ // Returns a defensive COPY (#274 F0): same contents and order, but NOT the
+ // caller's reference — builders must not be able to mutate, nor observe
+ // later mutations of, the stored fixture array.
+ expect(result).not.toBe(blocks);
+ expect(result).toEqual(blocks);
expect(result.map((b) => b.type)).toEqual(["toolCall", "text", "toolCall"]);
});
@@ -80,7 +83,7 @@ describe("resolveFixtureBlocks", () => {
it("rejects a toolCall block missing arguments", () => {
const blocks = [{ type: "toolCall", name: "f" }] as unknown as FixtureBlock[];
- expect(() => resolveFixtureBlocks(blocks)).toThrow(/index 0.*"name" and "arguments"/);
+ expect(() => resolveFixtureBlocks(blocks)).toThrow(/index 0.*string or object "arguments"/);
});
it("rejects a toolCall block with a non-string id", () => {
diff --git a/src/__tests__/example-fixtures-loadable.test.ts b/src/__tests__/example-fixtures-loadable.test.ts
new file mode 100644
index 00000000..5702d28e
--- /dev/null
+++ b/src/__tests__/example-fixtures-loadable.test.ts
@@ -0,0 +1,49 @@
+/**
+ * The shipped, copy-pasteable example fixtures under `fixtures/examples/llm/`
+ * must stay genuinely loadable — not merely well-formed JSON. The docs page
+ * (`docs/examples/index.html`) tells authors to copy these files verbatim, so a
+ * fixture that parses but fails recognition/validation would silently rot and
+ * mislead anyone who copies it.
+ *
+ * This loads the on-disk `blocks-tool-first.json` THROUGH THE REAL LOADER
+ * (`loadFixtureFile`, the same entry the mock server uses), then runs the real
+ * `validateFixtures` pass and asserts:
+ * - the loader returns the fixture (it swallows read/parse/shape errors into a
+ * warning + empty array, so a non-empty result already proves it loaded),
+ * - the blocks-only response is a recognized type (no hard validation errors),
+ * - the resolved blocks are tool-first (toolCall before text) — the exact
+ * ordering the docs example promises and that plain content/toolCalls cannot
+ * express.
+ */
+import { describe, it, expect } from "vitest";
+import { fileURLToPath } from "node:url";
+import { dirname, join } from "node:path";
+import { loadFixtureFile, validateFixtures } from "../fixture-loader.js";
+import { isContentWithToolCallsResponse, resolveFixtureBlocks } from "../helpers.js";
+import type { FixtureBlock } from "../types.js";
+
+const here = dirname(fileURLToPath(import.meta.url));
+const exampleDir = join(here, "..", "..", "fixtures", "examples", "llm");
+
+describe("shipped example fixtures load cleanly through the real loader", () => {
+ it("blocks-tool-first.json loads, validates, and is tool-first", () => {
+ const fixtures = loadFixtureFile(join(exampleDir, "blocks-tool-first.json"));
+
+ // The loader returns [] on read/parse/shape failure, so a non-empty result
+ // already proves the file genuinely loaded (not just parsed in isolation).
+ expect(fixtures).toHaveLength(1);
+
+ const response = fixtures[0].response;
+ expect(typeof response).not.toBe("function");
+ // Blocks-only fixtures are recognized via the BLOCKS-ONLY clause (#274 F0).
+ expect(isContentWithToolCallsResponse(response as object)).toBe(true);
+
+ // No hard validation errors — the file is genuinely valid, not just JSON.
+ const errors = validateFixtures(fixtures).filter((r) => r.severity === "error");
+ expect(errors).toEqual([]);
+
+ const blocks = resolveFixtureBlocks((response as { blocks: FixtureBlock[] }).blocks);
+ const types = blocks.map((b) => b.type);
+ expect(types.indexOf("toolCall")).toBeLessThan(types.indexOf("text"));
+ });
+});
diff --git a/src/__tests__/fixture-blocks-bedrock-converse.test.ts b/src/__tests__/fixture-blocks-bedrock-converse.test.ts
new file mode 100644
index 00000000..4249baca
--- /dev/null
+++ b/src/__tests__/fixture-blocks-bedrock-converse.test.ts
@@ -0,0 +1,340 @@
+import { describe, it, expect, beforeEach, afterEach, vi } from "vitest";
+import * as http from "node:http";
+import { crc32 } from "node:zlib";
+import type { Fixture } from "../types.js";
+import { createServer, type ServerInstance } from "../server.js";
+
+// ---------------------------------------------------------------------------
+// REPLAY block-ordering for the Bedrock Converse provider (#274 completion).
+//
+// Converse emits positional/ordered content: `output.message.content` is an
+// ARRAY (non-stream), and `contentBlockStart`/`contentBlockDelta` events carry
+// an explicit `contentBlockIndex` (stream). Both surfaces are order-observable,
+// so tool-first ordering (`toolUse` before `text`) IS wire-expressible — the
+// fixture's `blocks` array drives the emitted order.
+//
+// Mirrors the harness in bedrock-converse-toolonly-reasoning.test.ts.
+// ---------------------------------------------------------------------------
+
+let instance: ServerInstance | null = null;
+let baseUrl: string;
+
+function post(
+ path: string,
+ body: unknown,
+ headers?: Record,
+): Promise<{ status: number; headers: http.IncomingHttpHeaders; body: string }> {
+ return new Promise((resolve, reject) => {
+ const data = JSON.stringify(body);
+ const parsed = new URL(baseUrl);
+ const req = http.request(
+ {
+ hostname: parsed.hostname,
+ port: parsed.port,
+ path,
+ method: "POST",
+ headers: {
+ "Content-Type": "application/json",
+ "Content-Length": Buffer.byteLength(data),
+ ...headers,
+ },
+ },
+ (res) => {
+ const chunks: Buffer[] = [];
+ res.on("data", (c: Buffer) => chunks.push(c));
+ res.on("end", () => {
+ resolve({
+ status: res.statusCode ?? 0,
+ headers: res.headers,
+ body: Buffer.concat(chunks).toString(),
+ });
+ });
+ },
+ );
+ req.on("error", reject);
+ req.write(data);
+ req.end();
+ });
+}
+
+function postRaw(
+ path: string,
+ body: unknown,
+ headers?: Record,
+): Promise<{ status: number; headers: http.IncomingHttpHeaders; body: Buffer }> {
+ return new Promise((resolve, reject) => {
+ const data = JSON.stringify(body);
+ const parsed = new URL(baseUrl);
+ const req = http.request(
+ {
+ hostname: parsed.hostname,
+ port: parsed.port,
+ path,
+ method: "POST",
+ headers: {
+ "Content-Type": "application/json",
+ "Content-Length": Buffer.byteLength(data),
+ ...headers,
+ },
+ },
+ (res) => {
+ const chunks: Buffer[] = [];
+ res.on("data", (c: Buffer) => chunks.push(c));
+ res.on("end", () => {
+ resolve({
+ status: res.statusCode ?? 0,
+ headers: res.headers,
+ body: Buffer.concat(chunks),
+ });
+ });
+ },
+ );
+ req.on("error", reject);
+ req.write(data);
+ req.end();
+ });
+}
+
+/**
+ * Decode AWS Event Stream binary frames from a Buffer.
+ * Returns an array of { eventType, payload } objects in wire order.
+ */
+function decodeEventStreamFrames(buf: Buffer): Array<{ eventType: string; payload: object }> {
+ const frames: Array<{ eventType: string; payload: object }> = [];
+ let offset = 0;
+
+ while (offset < buf.length) {
+ if (offset + 12 > buf.length) break;
+
+ const totalLength = buf.readUInt32BE(offset);
+ const headersLength = buf.readUInt32BE(offset + 4);
+ const preludeCrc = buf.readUInt32BE(offset + 8);
+
+ const computedPreludeCrc = crc32(buf.subarray(offset, offset + 8));
+ if (computedPreludeCrc >>> 0 !== preludeCrc) {
+ throw new Error("Prelude CRC mismatch");
+ }
+
+ const headersStart = offset + 12;
+ const headersEnd = headersStart + headersLength;
+ const headers: Record = {};
+ let hOff = headersStart;
+ while (hOff < headersEnd) {
+ const nameLen = buf.readUInt8(hOff);
+ hOff += 1;
+ const name = buf.subarray(hOff, hOff + nameLen).toString("utf8");
+ hOff += nameLen;
+ hOff += 1; // skip header type byte (7 = STRING)
+ const valueLen = buf.readUInt16BE(hOff);
+ hOff += 2;
+ const value = buf.subarray(hOff, hOff + valueLen).toString("utf8");
+ hOff += valueLen;
+ headers[name] = value;
+ }
+
+ const payloadStart = headersEnd;
+ const payloadEnd = offset + totalLength - 4; // minus message CRC
+ const payloadBuf = buf.subarray(payloadStart, payloadEnd);
+ const payload = payloadBuf.length > 0 ? JSON.parse(payloadBuf.toString("utf8")) : {};
+
+ frames.push({
+ eventType: headers[":event-type"] ?? "",
+ payload,
+ });
+
+ offset += totalLength;
+ }
+
+ return frames;
+}
+
+// Index of the first contentBlockStart frame that begins a `text` block.
+function firstTextStartIndex(frames: Array<{ eventType: string; payload: object }>): number {
+ return frames.findIndex(
+ (f) =>
+ f.eventType === "contentBlockStart" &&
+ (f.payload as { start?: { toolUse?: unknown } }).start?.toolUse === undefined,
+ );
+}
+
+// Index of the first contentBlockStart frame that begins a `toolUse` block.
+function firstToolUseStartIndex(frames: Array<{ eventType: string; payload: object }>): number {
+ return frames.findIndex(
+ (f) =>
+ f.eventType === "contentBlockStart" &&
+ (f.payload as { start?: { toolUse?: unknown } }).start?.toolUse !== undefined,
+ );
+}
+
+const MODEL = "anthropic.claude-3-5-sonnet-20241022-v2:0";
+
+// ---------------------------------------------------------------------------
+// Fixtures
+// ---------------------------------------------------------------------------
+
+// tool-first combined fixture: `blocks` orders toolCall BEFORE text. The
+// legacy {content, toolCalls} fields are also present so back-compat callers
+// (no blocks) would still match — but with blocks present, ordering wins.
+const toolFirstBlocksFixture: Fixture = {
+ match: { userMessage: "tool-first" },
+ response: {
+ content: "Here is the weather.",
+ toolCalls: [{ name: "get_weather", arguments: '{"city":"SF"}' }],
+ blocks: [
+ { type: "toolCall", name: "get_weather", arguments: '{"city":"SF"}' },
+ { type: "text", text: "Here is the weather." },
+ ],
+ },
+};
+
+// blocks-only fixture (post-F0): no content/toolCalls, ordered tool-first.
+const blocksOnlyFixture: Fixture = {
+ match: { userMessage: "blocks-only-fixture" },
+ response: {
+ blocks: [
+ { type: "toolCall", name: "lookup", arguments: '{"q":"x"}' },
+ { type: "text", text: "Done looking up." },
+ ],
+ },
+};
+
+// back-compat fixture: legacy combined shape, NO blocks. Must emit the
+// unchanged text-first legacy ordering.
+const legacyFixture: Fixture = {
+ match: { userMessage: "legacy-combined" },
+ response: {
+ content: "Legacy text first.",
+ toolCalls: [{ name: "get_weather", arguments: '{"city":"NY"}' }],
+ },
+};
+
+const allFixtures: Fixture[] = [toolFirstBlocksFixture, blocksOnlyFixture, legacyFixture];
+
+beforeEach(async () => {
+ instance = await createServer(allFixtures, { port: 0, logLevel: "warn" });
+ baseUrl = instance.url;
+});
+
+afterEach(async () => {
+ vi.restoreAllMocks();
+ if (instance) {
+ await new Promise((resolve) => {
+ instance!.server.close(() => resolve());
+ });
+ instance = null;
+ }
+});
+
+// ---------------------------------------------------------------------------
+// Non-streaming: content array ordering
+// ---------------------------------------------------------------------------
+
+describe("Bedrock Converse blocks ordering (non-streaming)", () => {
+ it("tool-first: toolUse precedes text in the content array", async () => {
+ const res = await post(`/model/${MODEL}/converse`, {
+ messages: [{ role: "user", content: [{ text: "tool-first" }] }],
+ });
+
+ expect(res.status).toBe(200);
+ const content = JSON.parse(res.body).output.message.content as Array>;
+ const toolIdx = content.findIndex((b) => "toolUse" in b);
+ const textIdx = content.findIndex((b) => "text" in b);
+ expect(toolIdx).toBeGreaterThanOrEqual(0);
+ expect(textIdx).toBeGreaterThanOrEqual(0);
+ // tool-first: toolUse block comes BEFORE the text block.
+ expect(toolIdx).toBeLessThan(textIdx);
+
+ const toolUse = content[toolIdx].toolUse as { name: string; input: unknown };
+ expect(toolUse.name).toBe("get_weather");
+ expect(toolUse.input).toEqual({ city: "SF" });
+ expect((content[textIdx] as { text: string }).text).toBe("Here is the weather.");
+ });
+
+ it("blocks-only fixture: toolUse precedes text in the content array", async () => {
+ const res = await post(`/model/${MODEL}/converse`, {
+ messages: [{ role: "user", content: [{ text: "blocks-only-fixture" }] }],
+ });
+
+ expect(res.status).toBe(200);
+ const content = JSON.parse(res.body).output.message.content as Array>;
+ const toolIdx = content.findIndex((b) => "toolUse" in b);
+ const textIdx = content.findIndex((b) => "text" in b);
+ expect(toolIdx).toBeGreaterThanOrEqual(0);
+ expect(textIdx).toBeGreaterThanOrEqual(0);
+ expect(toolIdx).toBeLessThan(textIdx);
+ expect((content[toolIdx].toolUse as { name: string }).name).toBe("lookup");
+ expect((content[textIdx] as { text: string }).text).toBe("Done looking up.");
+ });
+
+ it("back-compat: no-blocks fixture emits unchanged text-first legacy order", async () => {
+ const res = await post(`/model/${MODEL}/converse`, {
+ messages: [{ role: "user", content: [{ text: "legacy-combined" }] }],
+ });
+
+ expect(res.status).toBe(200);
+ const content = JSON.parse(res.body).output.message.content as Array>;
+ const toolIdx = content.findIndex((b) => "toolUse" in b);
+ const textIdx = content.findIndex((b) => "text" in b);
+ // legacy: text block leads, toolUse follows.
+ expect(textIdx).toBeLessThan(toolIdx);
+ expect((content[textIdx] as { text: string }).text).toBe("Legacy text first.");
+ expect((content[toolIdx].toolUse as { name: string }).name).toBe("get_weather");
+ });
+});
+
+// ---------------------------------------------------------------------------
+// Streaming: contentBlock event ordering
+// ---------------------------------------------------------------------------
+
+describe("Bedrock Converse blocks ordering (streaming)", () => {
+ it("tool-first: toolUse contentBlockStart precedes text contentBlockStart", async () => {
+ const res = await postRaw(`/model/${MODEL}/converse-stream`, {
+ messages: [{ role: "user", content: [{ text: "tool-first" }] }],
+ });
+
+ expect(res.status).toBe(200);
+ const frames = decodeEventStreamFrames(res.body);
+ const toolStart = firstToolUseStartIndex(frames);
+ const textStart = firstTextStartIndex(frames);
+ expect(toolStart).toBeGreaterThanOrEqual(0);
+ expect(textStart).toBeGreaterThanOrEqual(0);
+ // tool-first: the toolUse block opens BEFORE the text block.
+ expect(toolStart).toBeLessThan(textStart);
+
+ // contentBlockIndex follows encounter order: toolUse=0, text=1.
+ expect((frames[toolStart].payload as { contentBlockIndex: number }).contentBlockIndex).toBe(0);
+ expect((frames[textStart].payload as { contentBlockIndex: number }).contentBlockIndex).toBe(1);
+ });
+
+ it("blocks-only fixture streams tool-first", async () => {
+ const res = await postRaw(`/model/${MODEL}/converse-stream`, {
+ messages: [{ role: "user", content: [{ text: "blocks-only-fixture" }] }],
+ });
+
+ expect(res.status).toBe(200);
+ const frames = decodeEventStreamFrames(res.body);
+ const toolStart = firstToolUseStartIndex(frames);
+ const textStart = firstTextStartIndex(frames);
+ expect(toolStart).toBeGreaterThanOrEqual(0);
+ expect(textStart).toBeGreaterThanOrEqual(0);
+ expect(toolStart).toBeLessThan(textStart);
+ expect(
+ (frames[toolStart].payload as { start: { toolUse: { name: string } } }).start.toolUse.name,
+ ).toBe("lookup");
+ });
+
+ it("back-compat: no-blocks fixture streams unchanged text-first legacy order", async () => {
+ const res = await postRaw(`/model/${MODEL}/converse-stream`, {
+ messages: [{ role: "user", content: [{ text: "legacy-combined" }] }],
+ });
+
+ expect(res.status).toBe(200);
+ const frames = decodeEventStreamFrames(res.body);
+ const toolStart = firstToolUseStartIndex(frames);
+ const textStart = firstTextStartIndex(frames);
+ // legacy: text block opens first (index 0), toolUse follows (index 1).
+ expect(textStart).toBeLessThan(toolStart);
+ expect((frames[textStart].payload as { contentBlockIndex: number }).contentBlockIndex).toBe(0);
+ expect((frames[toolStart].payload as { contentBlockIndex: number }).contentBlockIndex).toBe(1);
+ });
+});
diff --git a/src/__tests__/fixture-blocks-bedrock.test.ts b/src/__tests__/fixture-blocks-bedrock.test.ts
new file mode 100644
index 00000000..ff2dce1b
--- /dev/null
+++ b/src/__tests__/fixture-blocks-bedrock.test.ts
@@ -0,0 +1,386 @@
+import { describe, it, expect, afterEach } from "vitest";
+import * as http from "node:http";
+import { crc32 } from "node:zlib";
+import type { Fixture } from "../types.js";
+import { createServer, type ServerInstance } from "../server.js";
+
+// ─── HTTP helpers (mirror bedrock-stream.test.ts) ───────────────────────────
+
+function post(
+ url: string,
+ body: unknown,
+): Promise<{ status: number; headers: http.IncomingHttpHeaders; body: string }> {
+ return new Promise((resolve, reject) => {
+ const data = JSON.stringify(body);
+ const parsed = new URL(url);
+ const req = http.request(
+ {
+ hostname: parsed.hostname,
+ port: parsed.port,
+ path: parsed.pathname,
+ method: "POST",
+ headers: {
+ "Content-Type": "application/json",
+ "Content-Length": Buffer.byteLength(data),
+ },
+ },
+ (res) => {
+ const chunks: Buffer[] = [];
+ res.on("data", (c: Buffer) => chunks.push(c));
+ res.on("end", () => {
+ resolve({
+ status: res.statusCode ?? 0,
+ headers: res.headers,
+ body: Buffer.concat(chunks).toString(),
+ });
+ });
+ },
+ );
+ req.on("error", reject);
+ req.write(data);
+ req.end();
+ });
+}
+
+function postBinary(
+ url: string,
+ body: unknown,
+): Promise<{ status: number; headers: http.IncomingHttpHeaders; body: Buffer }> {
+ return new Promise((resolve, reject) => {
+ const data = JSON.stringify(body);
+ const parsed = new URL(url);
+ const req = http.request(
+ {
+ hostname: parsed.hostname,
+ port: parsed.port,
+ path: parsed.pathname,
+ method: "POST",
+ headers: {
+ "Content-Type": "application/json",
+ "Content-Length": Buffer.byteLength(data),
+ },
+ },
+ (res) => {
+ const chunks: Buffer[] = [];
+ res.on("data", (c: Buffer) => chunks.push(c));
+ res.on("end", () => {
+ resolve({
+ status: res.statusCode ?? 0,
+ headers: res.headers,
+ body: Buffer.concat(chunks),
+ });
+ });
+ },
+ );
+ req.on("error", reject);
+ req.write(data);
+ req.end();
+ });
+}
+
+interface ParsedFrame {
+ eventType: string;
+ payload: unknown;
+}
+
+function parseFrames(buf: Buffer): ParsedFrame[] {
+ const frames: ParsedFrame[] = [];
+ let offset = 0;
+
+ while (offset < buf.length) {
+ const totalLength = buf.readUInt32BE(offset);
+ const frame = buf.subarray(offset, offset + totalLength);
+
+ const headersLength = frame.readUInt32BE(4);
+ const headersStart = 12;
+ const headersEnd = headersStart + headersLength;
+ const headers: Record = {};
+ let hOffset = headersStart;
+ while (hOffset < headersEnd) {
+ const nameLen = frame.readUInt8(hOffset);
+ hOffset += 1;
+ const name = frame.subarray(hOffset, hOffset + nameLen).toString("utf8");
+ hOffset += nameLen;
+ hOffset += 1; // type byte (7 = STRING)
+ const valueLen = frame.readUInt16BE(hOffset);
+ hOffset += 2;
+ const value = frame.subarray(hOffset, hOffset + valueLen).toString("utf8");
+ hOffset += valueLen;
+ headers[name] = value;
+ }
+
+ const payloadStart = headersEnd;
+ const payloadEnd = totalLength - 4;
+ const payloadBuf = frame.subarray(payloadStart, payloadEnd);
+ let payload: unknown = null;
+ if (payloadBuf.length > 0) {
+ payload = JSON.parse(payloadBuf.toString("utf8"));
+ }
+
+ // crc32 is imported to keep the parser shape identical to the sibling
+ // suite; checksum validation is exercised exhaustively in bedrock-stream.
+ void crc32;
+
+ frames.push({
+ eventType: headers[":event-type"] ?? "",
+ payload,
+ });
+
+ offset += totalLength;
+ }
+
+ return frames;
+}
+
+// ─── test lifecycle ─────────────────────────────────────────────────────────
+
+const MODEL_ID = "anthropic.claude-3-5-sonnet-20241022-v2:0";
+
+let instance: ServerInstance | null = null;
+
+afterEach(async () => {
+ if (instance) {
+ await new Promise((resolve) => {
+ instance!.server.close(() => resolve());
+ });
+ instance = null;
+ }
+});
+
+// ─── tool-first ordering (combined fixture: content + toolCalls + blocks) ────
+
+describe("Bedrock invoke — ordered fixture blocks (tool-first)", () => {
+ const toolFirstFixture: Fixture = {
+ match: { userMessage: "bedrock blocks tool-first" },
+ response: {
+ content: "Checking.",
+ toolCalls: [{ name: "get_weather", arguments: '{"city":"NYC"}' }],
+ blocks: [
+ { type: "toolCall", name: "get_weather", arguments: '{"city":"NYC"}' },
+ { type: "text", text: "Here you go." },
+ ],
+ },
+ };
+
+ it("non-streaming: emits tool_use content entry before text in content[]", async () => {
+ instance = await createServer([toolFirstFixture]);
+ const res = await post(`${instance.url}/model/${MODEL_ID}/invoke`, {
+ anthropic_version: "bedrock-2023-05-31",
+ max_tokens: 512,
+ messages: [{ role: "user", content: "bedrock blocks tool-first" }],
+ });
+
+ expect(res.status).toBe(200);
+ const body = JSON.parse(res.body) as {
+ content: Array<{ type: string; name?: string; text?: string; input?: unknown }>;
+ stop_reason: string;
+ };
+
+ expect(body.content).toHaveLength(2);
+ // tool_use FIRST (index 0), text SECOND (index 1).
+ expect(body.content[0].type).toBe("tool_use");
+ expect(body.content[0].name).toBe("get_weather");
+ expect(body.content[0].input).toEqual({ city: "NYC" });
+ expect(body.content[1].type).toBe("text");
+ expect(body.content[1].text).toBe("Here you go.");
+ expect(body.stop_reason).toBe("tool_use");
+ });
+
+ it("streaming: emits tool_use content_block events before text", async () => {
+ instance = await createServer([toolFirstFixture]);
+ const res = await postBinary(`${instance.url}/model/${MODEL_ID}/invoke-with-response-stream`, {
+ anthropic_version: "bedrock-2023-05-31",
+ max_tokens: 512,
+ messages: [{ role: "user", content: "bedrock blocks tool-first" }],
+ });
+
+ expect(res.status).toBe(200);
+ const frames = parseFrames(res.body);
+
+ const starts = frames.filter(
+ (f) => (f.payload as { type?: string }).type === "content_block_start",
+ );
+ expect(starts).toHaveLength(2);
+
+ // First content block at index 0 is the tool_use; text follows at index 1.
+ const s0 = starts[0].payload as {
+ index: number;
+ content_block: { type: string; name?: string };
+ };
+ const s1 = starts[1].payload as { index: number; content_block: { type: string } };
+ expect(s0.index).toBe(0);
+ expect(s0.content_block.type).toBe("tool_use");
+ expect(s0.content_block.name).toBe("get_weather");
+ expect(s1.index).toBe(1);
+ expect(s1.content_block.type).toBe("text");
+
+ // tool_use start must precede the text start on the wire.
+ const toolStartIdx = frames.findIndex(
+ (f) =>
+ (f.payload as { type?: string }).type === "content_block_start" &&
+ (f.payload as { content_block?: { type: string } }).content_block?.type === "tool_use",
+ );
+ const textStartIdx = frames.findIndex(
+ (f) =>
+ (f.payload as { type?: string }).type === "content_block_start" &&
+ (f.payload as { content_block?: { type: string } }).content_block?.type === "text",
+ );
+ expect(toolStartIdx).toBeLessThan(textStartIdx);
+
+ // tool input arrives via input_json_delta on index 0.
+ const toolDelta = frames.find(
+ (f) =>
+ (f.payload as { type?: string }).type === "content_block_delta" &&
+ (f.payload as { index?: number }).index === 0 &&
+ (f.payload as { delta?: { type?: string } }).delta?.type === "input_json_delta",
+ );
+ expect(toolDelta).toBeDefined();
+ expect(
+ JSON.parse((toolDelta!.payload as { delta: { partial_json: string } }).delta.partial_json),
+ ).toEqual({ city: "NYC" });
+
+ // text arrives via text_delta on index 1.
+ const textDelta = frames.find(
+ (f) =>
+ (f.payload as { type?: string }).type === "content_block_delta" &&
+ (f.payload as { index?: number }).index === 1 &&
+ (f.payload as { delta?: { type?: string } }).delta?.type === "text_delta",
+ );
+ expect(textDelta).toBeDefined();
+ expect((textDelta!.payload as { delta: { text: string } }).delta.text).toBe("Here you go.");
+
+ // message envelope preserved, stop_reason tool_use.
+ expect((frames[0].payload as { type?: string }).type).toBe("message_start");
+ const msgDelta = frames.find((f) => (f.payload as { type?: string }).type === "message_delta");
+ expect((msgDelta!.payload as { delta: { stop_reason: string } }).delta.stop_reason).toBe(
+ "tool_use",
+ );
+ expect(
+ frames.find((f) => (f.payload as { type?: string }).type === "message_stop"),
+ ).toBeDefined();
+ });
+});
+
+// ─── blocks-only fixture (no content / no toolCalls) ─────────────────────────
+
+describe("Bedrock invoke — blocks-only fixture", () => {
+ const blocksOnlyFixture: Fixture = {
+ match: { userMessage: "bedrock blocks-only" },
+ response: {
+ blocks: [
+ { type: "toolCall", name: "lookup", arguments: '{"id":7}' },
+ { type: "text", text: "Done." },
+ ],
+ },
+ };
+
+ it("streaming: emits tool-first content_block events purely from blocks", async () => {
+ instance = await createServer([blocksOnlyFixture]);
+ const res = await postBinary(`${instance.url}/model/${MODEL_ID}/invoke-with-response-stream`, {
+ anthropic_version: "bedrock-2023-05-31",
+ max_tokens: 512,
+ messages: [{ role: "user", content: "bedrock blocks-only" }],
+ });
+
+ expect(res.status).toBe(200);
+ const frames = parseFrames(res.body);
+
+ const starts = frames.filter(
+ (f) => (f.payload as { type?: string }).type === "content_block_start",
+ );
+ expect(starts).toHaveLength(2);
+
+ const s0 = starts[0].payload as {
+ index: number;
+ content_block: { type: string; name?: string };
+ };
+ expect(s0.index).toBe(0);
+ expect(s0.content_block.type).toBe("tool_use");
+ expect(s0.content_block.name).toBe("lookup");
+
+ const s1 = starts[1].payload as { index: number; content_block: { type: string } };
+ expect(s1.index).toBe(1);
+ expect(s1.content_block.type).toBe("text");
+
+ const textDelta = frames.find(
+ (f) =>
+ (f.payload as { type?: string }).type === "content_block_delta" &&
+ (f.payload as { index?: number }).index === 1 &&
+ (f.payload as { delta?: { type?: string } }).delta?.type === "text_delta",
+ );
+ expect((textDelta!.payload as { delta: { text: string } }).delta.text).toBe("Done.");
+ });
+
+ it("non-streaming: emits tool-first content[] purely from blocks", async () => {
+ instance = await createServer([blocksOnlyFixture]);
+ const res = await post(`${instance.url}/model/${MODEL_ID}/invoke`, {
+ anthropic_version: "bedrock-2023-05-31",
+ max_tokens: 512,
+ messages: [{ role: "user", content: "bedrock blocks-only" }],
+ });
+
+ expect(res.status).toBe(200);
+ const body = JSON.parse(res.body) as {
+ content: Array<{ type: string; name?: string; text?: string }>;
+ };
+ expect(body.content).toHaveLength(2);
+ expect(body.content[0].type).toBe("tool_use");
+ expect(body.content[0].name).toBe("lookup");
+ expect(body.content[1].type).toBe("text");
+ expect(body.content[1].text).toBe("Done.");
+ });
+});
+
+// ─── back-compat: no-blocks fixture → identical legacy text-first output ─────
+
+describe("Bedrock invoke — back-compat (no blocks)", () => {
+ const legacyFixture: Fixture = {
+ match: { userMessage: "bedrock blocks legacy" },
+ response: {
+ content: "Checking.",
+ toolCalls: [{ name: "get_weather", arguments: '{"city":"NYC"}' }],
+ },
+ };
+
+ it("non-streaming: legacy emits text content entry first, then tool_use", async () => {
+ instance = await createServer([legacyFixture]);
+ const res = await post(`${instance.url}/model/${MODEL_ID}/invoke`, {
+ anthropic_version: "bedrock-2023-05-31",
+ max_tokens: 512,
+ messages: [{ role: "user", content: "bedrock blocks legacy" }],
+ });
+
+ expect(res.status).toBe(200);
+ const body = JSON.parse(res.body) as {
+ content: Array<{ type: string; text?: string; name?: string }>;
+ };
+ expect(body.content).toHaveLength(2);
+ expect(body.content[0].type).toBe("text");
+ expect(body.content[0].text).toBe("Checking.");
+ expect(body.content[1].type).toBe("tool_use");
+ expect(body.content[1].name).toBe("get_weather");
+ });
+
+ it("streaming: legacy emits text block at index 0, then tool_use at index 1", async () => {
+ instance = await createServer([legacyFixture]);
+ const res = await postBinary(`${instance.url}/model/${MODEL_ID}/invoke-with-response-stream`, {
+ anthropic_version: "bedrock-2023-05-31",
+ max_tokens: 512,
+ messages: [{ role: "user", content: "bedrock blocks legacy" }],
+ });
+
+ expect(res.status).toBe(200);
+ const frames = parseFrames(res.body);
+ const starts = frames.filter(
+ (f) => (f.payload as { type?: string }).type === "content_block_start",
+ );
+ expect(starts).toHaveLength(2);
+
+ const s0 = starts[0].payload as { index: number; content_block: { type: string } };
+ const s1 = starts[1].payload as { index: number; content_block: { type: string } };
+ expect(s0.index).toBe(0);
+ expect(s0.content_block.type).toBe("text");
+ expect(s1.index).toBe(1);
+ expect(s1.content_block.type).toBe("tool_use");
+ });
+});
diff --git a/src/__tests__/fixture-blocks-cohere.test.ts b/src/__tests__/fixture-blocks-cohere.test.ts
new file mode 100644
index 00000000..c0743a26
--- /dev/null
+++ b/src/__tests__/fixture-blocks-cohere.test.ts
@@ -0,0 +1,266 @@
+import { describe, it, expect, afterEach } from "vitest";
+import { LLMock } from "../llmock.js";
+
+interface CohereSSEEvent {
+ type: string;
+ index?: number;
+ delta?: {
+ message?: {
+ content?: { type?: string; text?: string };
+ tool_calls?: {
+ id?: string;
+ type?: string;
+ function?: { name?: string; arguments?: string };
+ };
+ tool_plan?: string;
+ };
+ finish_reason?: string;
+ usage?: unknown;
+ };
+ [key: string]: unknown;
+}
+
+function parseCohereSSEEvents(body: string): CohereSSEEvent[] {
+ return body
+ .split("\n\n")
+ .filter((block) => block.trim().length > 0)
+ .map((block) => {
+ const dataLine = block.split("\n").find((l) => l.startsWith("data: "));
+ if (!dataLine) return null;
+ return JSON.parse(dataLine.slice(6)) as CohereSSEEvent;
+ })
+ .filter(Boolean) as CohereSSEEvent[];
+}
+
+async function postCohereStream(mock: LLMock, userMessage: string): Promise {
+ const res = await fetch(`${mock.url}/v2/chat`, {
+ method: "POST",
+ headers: {
+ "Content-Type": "application/json",
+ Authorization: "Bearer test-key",
+ },
+ body: JSON.stringify({
+ model: "command-r-plus",
+ messages: [{ role: "user", content: userMessage }],
+ stream: true,
+ }),
+ });
+ return parseCohereSSEEvents(await res.text());
+}
+
+interface CohereNonStreamResponse {
+ message: {
+ content: { type: string; text: string }[];
+ tool_calls: { function: { name: string; arguments: string } }[];
+ };
+}
+
+async function postCohereNonStream(
+ mock: LLMock,
+ userMessage: string,
+): Promise {
+ const res = await fetch(`${mock.url}/v2/chat`, {
+ method: "POST",
+ headers: {
+ "Content-Type": "application/json",
+ Authorization: "Bearer test-key",
+ },
+ body: JSON.stringify({
+ model: "command-r-plus",
+ messages: [{ role: "user", content: userMessage }],
+ stream: false,
+ }),
+ });
+ return (await res.json()) as CohereNonStreamResponse;
+}
+
+describe("Cohere v2 Chat — ordered fixture blocks (tool-first)", () => {
+ let mock: LLMock | null = null;
+
+ afterEach(async () => {
+ if (mock) {
+ await mock.stop();
+ mock = null;
+ }
+ });
+
+ it("streams the tool call before the text for blocks [toolCall, text]", async () => {
+ mock = new LLMock({ port: 0 });
+ mock.addFixture({
+ match: { userMessage: "test cohere blocks tool-first" },
+ response: {
+ content: "Checking.",
+ toolCalls: [{ name: "get_weather", arguments: '{"city":"NYC"}' }],
+ blocks: [
+ { type: "toolCall", name: "get_weather", arguments: '{"city":"NYC"}' },
+ { type: "text", text: "Here you go." },
+ ],
+ },
+ });
+ await mock.start();
+
+ const events = await postCohereStream(mock, "test cohere blocks tool-first");
+
+ // The tool-call-start must precede the text content-start on the wire.
+ const toolIdx = events.findIndex((e) => e.type === "tool-call-start");
+ const textIdx = events.findIndex((e) => e.type === "content-start");
+ expect(toolIdx).toBeGreaterThanOrEqual(0);
+ expect(textIdx).toBeGreaterThanOrEqual(0);
+ expect(toolIdx).toBeLessThan(textIdx);
+
+ // The tool-call-start carries the function name.
+ const toolStart = events.find((e) => e.type === "tool-call-start");
+ expect(toolStart!.delta!.message!.tool_calls!.function!.name).toBe("get_weather");
+
+ // tool-call-delta args reassemble to the fixture arguments.
+ const args = events
+ .filter((e) => e.type === "tool-call-delta")
+ .map((e) => e.delta!.message!.tool_calls!.function!.arguments ?? "")
+ .join("");
+ expect(args).toBe('{"city":"NYC"}');
+
+ // The text arrives via content-delta.
+ const text = events
+ .filter((e) => e.type === "content-delta")
+ .map((e) => e.delta!.message!.content!.text ?? "")
+ .join("");
+ expect(text).toBe("Here you go.");
+
+ // Message envelope preserved with TOOL_CALL finish reason.
+ expect(events.find((e) => e.type === "message-start")).toBeDefined();
+ const messageEnd = events.find((e) => e.type === "message-end");
+ expect(messageEnd).toBeDefined();
+ expect(messageEnd!.delta!.finish_reason).toBe("TOOL_CALL");
+ });
+
+ it("blocks-only fixture (no content/toolCalls) streams tool-first purely from blocks", async () => {
+ mock = new LLMock({ port: 0 });
+ mock.addFixture({
+ match: { userMessage: "test cohere blocks-only" },
+ response: {
+ blocks: [
+ { type: "toolCall", name: "get_weather", arguments: '{"city":"NYC"}' },
+ { type: "text", text: "Here you go." },
+ ],
+ },
+ });
+ await mock.start();
+
+ const events = await postCohereStream(mock, "test cohere blocks-only");
+
+ const toolIdx = events.findIndex((e) => e.type === "tool-call-start");
+ const textIdx = events.findIndex((e) => e.type === "content-start");
+ expect(toolIdx).toBeGreaterThanOrEqual(0);
+ expect(textIdx).toBeGreaterThanOrEqual(0);
+ expect(toolIdx).toBeLessThan(textIdx);
+
+ const toolStart = events.find((e) => e.type === "tool-call-start");
+ expect(toolStart!.delta!.message!.tool_calls!.function!.name).toBe("get_weather");
+
+ const args = events
+ .filter((e) => e.type === "tool-call-delta")
+ .map((e) => e.delta!.message!.tool_calls!.function!.arguments ?? "")
+ .join("");
+ expect(args).toBe('{"city":"NYC"}');
+
+ const text = events
+ .filter((e) => e.type === "content-delta")
+ .map((e) => e.delta!.message!.content!.text ?? "")
+ .join("");
+ expect(text).toBe("Here you go.");
+ });
+
+ it("back-compat: a fixture without blocks emits the legacy text-first order", async () => {
+ mock = new LLMock({ port: 0 });
+ mock.addFixture({
+ match: { userMessage: "test cohere blocks legacy" },
+ response: {
+ content: "Hello there.",
+ toolCalls: [{ name: "get_weather", arguments: '{"city":"NYC"}' }],
+ },
+ });
+ await mock.start();
+
+ const events = await postCohereStream(mock, "test cohere blocks legacy");
+
+ // Legacy: text content-start appears BEFORE the first tool-call-start.
+ const toolIdx = events.findIndex((e) => e.type === "tool-call-start");
+ const textIdx = events.findIndex((e) => e.type === "content-start");
+ expect(textIdx).toBeGreaterThanOrEqual(0);
+ expect(toolIdx).toBeGreaterThanOrEqual(0);
+ expect(textIdx).toBeLessThan(toolIdx);
+
+ const text = events
+ .filter((e) => e.type === "content-delta")
+ .map((e) => e.delta!.message!.content!.text ?? "")
+ .join("");
+ expect(text).toBe("Hello there.");
+
+ const toolStart = events.find((e) => e.type === "tool-call-start");
+ expect(toolStart!.delta!.message!.tool_calls!.function!.name).toBe("get_weather");
+
+ const messageEnd = events.find((e) => e.type === "message-end");
+ expect(messageEnd!.delta!.finish_reason).toBe("TOOL_CALL");
+ });
+});
+
+describe("Cohere v2 Chat — non-streaming fixture blocks", () => {
+ let mock: LLMock | null = null;
+
+ afterEach(async () => {
+ if (mock) {
+ await mock.stop();
+ mock = null;
+ }
+ });
+
+ it("tool-only blocks emit NO spurious empty text content entry", async () => {
+ mock = new LLMock({ port: 0 });
+ mock.addFixture({
+ match: { userMessage: "test cohere nonstream tool-only" },
+ response: {
+ blocks: [{ type: "toolCall", name: "get_weather", arguments: '{"city":"NYC"}' }],
+ },
+ });
+ await mock.start();
+
+ const body = await postCohereNonStream(mock, "test cohere nonstream tool-only");
+
+ // The tool call is present.
+ expect(body.message.tool_calls).toHaveLength(1);
+ expect(body.message.tool_calls[0].function.name).toBe("get_weather");
+ expect(body.message.tool_calls[0].function.arguments).toBe('{"city":"NYC"}');
+
+ // No content entry at all — and specifically no empty `{ text: "" }`.
+ expect(body.message.content).toHaveLength(0);
+ expect(body.message.content.some((c) => c.type === "text" && c.text === "")).toBe(false);
+ });
+
+ it("mixed blocks (text + tool) derive both content and tool calls correctly", async () => {
+ mock = new LLMock({ port: 0 });
+ mock.addFixture({
+ match: { userMessage: "test cohere nonstream mixed" },
+ response: {
+ blocks: [
+ { type: "text", text: "Here you go." },
+ { type: "toolCall", name: "get_weather", arguments: '{"city":"NYC"}' },
+ ],
+ },
+ });
+ await mock.start();
+
+ const body = await postCohereNonStream(mock, "test cohere nonstream mixed");
+
+ // Text content derived from the text block.
+ const text = body.message.content
+ .filter((c) => c.type === "text")
+ .map((c) => c.text)
+ .join("");
+ expect(text).toBe("Here you go.");
+
+ // Tool call derived from the tool block.
+ expect(body.message.tool_calls).toHaveLength(1);
+ expect(body.message.tool_calls[0].function.name).toBe("get_weather");
+ expect(body.message.tool_calls[0].function.arguments).toBe('{"city":"NYC"}');
+ });
+});
diff --git a/src/__tests__/fixture-blocks-gemini-interactions.test.ts b/src/__tests__/fixture-blocks-gemini-interactions.test.ts
new file mode 100644
index 00000000..ca39f37e
--- /dev/null
+++ b/src/__tests__/fixture-blocks-gemini-interactions.test.ts
@@ -0,0 +1,298 @@
+/**
+ * Gemini Interactions API (SDK 2.x step protocol): ordered `blocks` replay.
+ *
+ * The Interactions step protocol is index/step-addressed and ordered, so
+ * tool-first IS wire-expressible on REPLAY. When a combined
+ * content+toolCalls fixture (or a blocks-only fixture) sets the optional
+ * `blocks` field, the builder must emit `steps[]` (non-stream) and the
+ * streamed `step.*` brackets in the fixture's block ARRAY ORDER. A `toolCall`
+ * block placed before a `text` block therefore yields a `function_call` step
+ * AHEAD of the `model_output` step — the opposite of the legacy
+ * (text-step-always-first) shape.
+ *
+ * Real mock-server surface (mirrors gemini-interactions.test.ts): an actual
+ * server listens, a real HTTP request hits `/v1beta/interactions`, and
+ * assertions read the wire bytes for both `stream: false` (steps[]) and
+ * `stream: true` (step.start sequence).
+ */
+import { describe, it, expect, afterEach, beforeEach } from "vitest";
+import * as http from "node:http";
+import type { Fixture, FixtureBlock } from "../types.js";
+import { createServer, type ServerInstance } from "../server.js";
+import { resetInteractionCounter, resetEventIdCounter } from "../gemini-interactions.js";
+
+// --- helpers (mirror gemini-interactions.test.ts) ---
+
+function post(
+ url: string,
+ body: unknown,
+): Promise<{ status: number; headers: http.IncomingHttpHeaders; body: string }> {
+ return new Promise((resolve, reject) => {
+ const data = JSON.stringify(body);
+ const parsed = new URL(url);
+ const req = http.request(
+ {
+ hostname: parsed.hostname,
+ port: parsed.port,
+ path: parsed.pathname,
+ method: "POST",
+ headers: {
+ "Content-Type": "application/json",
+ "Content-Length": Buffer.byteLength(data),
+ },
+ },
+ (res) => {
+ const chunks: Buffer[] = [];
+ res.on("data", (c: Buffer) => chunks.push(c));
+ res.on("end", () => {
+ resolve({
+ status: res.statusCode ?? 0,
+ headers: res.headers,
+ body: Buffer.concat(chunks).toString(),
+ });
+ });
+ },
+ );
+ req.on("error", reject);
+ req.write(data);
+ req.end();
+ });
+}
+
+function parseInteractionsSSEEvents(body: string): Array> {
+ const events: Array> = [];
+ for (const line of body.split("\n")) {
+ if (line.startsWith("data: ")) {
+ events.push(JSON.parse(line.slice(6)) as Record);
+ }
+ }
+ return events;
+}
+
+// Step type of a step.start event (function_call | model_output).
+function stepStartTypesInOrder(events: Array>): string[] {
+ return events
+ .filter((e) => e.event_type === "step.start")
+ .map((e) => (e.step as Record).type as string);
+}
+
+// --- fixtures ---
+
+const toolFirstBlocks: FixtureBlock[] = [
+ { type: "toolCall", name: "get_weather", arguments: '{"city":"NYC"}', id: "call_tf" },
+ { type: "text", text: "Here you go." },
+];
+
+const toolFirstFixture: Fixture = {
+ match: { userMessage: "gi blocks tool-first" },
+ response: {
+ content: "Here you go.",
+ toolCalls: [{ name: "get_weather", arguments: '{"city":"NYC"}', id: "call_tf" }],
+ blocks: toolFirstBlocks,
+ },
+};
+
+const blocksOnlyFixture: Fixture = {
+ match: { userMessage: "gi blocks-only tool-first" },
+ response: {
+ blocks: [
+ { type: "toolCall", name: "lookup", arguments: '{"q":"x"}', id: "call_bo" },
+ { type: "text", text: "Done." },
+ ],
+ },
+};
+
+const legacyContentWithToolsFixture: Fixture = {
+ match: { userMessage: "gi legacy combined" },
+ response: {
+ content: "Let me help you",
+ toolCalls: [{ name: "analyze_data", arguments: '{"dataset":"sales"}', id: "call_legacy" }],
+ },
+};
+
+const allFixtures: Fixture[] = [toolFirstFixture, blocksOnlyFixture, legacyContentWithToolsFixture];
+
+// --- tests ---
+
+let instance: ServerInstance | null = null;
+
+beforeEach(() => {
+ resetInteractionCounter();
+ resetEventIdCounter();
+});
+
+afterEach(async () => {
+ if (instance) {
+ await new Promise((resolve) => {
+ instance!.server.close(() => resolve());
+ });
+ instance = null;
+ }
+});
+
+describe("Gemini Interactions API — fixture block ordering (#274)", () => {
+ // ── tool-first: non-stream steps[] order ──────────────────────────────────
+ it("tool-first blocks: function_call step precedes model_output in non-stream steps[]", async () => {
+ instance = await createServer([...allFixtures]);
+ const res = await post(`${instance.url}/v1beta/interactions`, {
+ model: "gemini-2.5-flash",
+ input: "gi blocks tool-first",
+ stream: false,
+ });
+ expect(res.status).toBe(200);
+ const body = JSON.parse(res.body) as {
+ steps: Array>;
+ output_text?: string;
+ };
+ const types = body.steps.map((s) => s.type as string);
+ // RED before fix: legacy emits model_output FIRST (index 0), function_call after.
+ expect(types.indexOf("function_call")).toBeLessThan(types.indexOf("model_output"));
+ expect(types[0]).toBe("function_call");
+ // The function_call step carries name + parsed arguments.
+ expect(body.steps[0].name).toBe("get_weather");
+ expect(body.steps[0].arguments).toEqual({ city: "NYC" });
+ // Text still present.
+ expect(body.output_text).toBe("Here you go.");
+ const textStep = body.steps.find((s) => s.type === "model_output")!;
+ expect((textStep.content as Array<{ text: string }>)[0].text).toBe("Here you go.");
+ });
+
+ // ── tool-first: streamed step.start order ─────────────────────────────────
+ it("tool-first blocks: function_call step.start precedes model_output step.start when streaming", async () => {
+ instance = await createServer([...allFixtures]);
+ const res = await post(`${instance.url}/v1beta/interactions`, {
+ model: "gemini-2.5-flash",
+ input: "gi blocks tool-first",
+ stream: true,
+ });
+ expect(res.status).toBe(200);
+ const events = parseInteractionsSSEEvents(res.body);
+ const startTypes = stepStartTypesInOrder(events);
+ // RED before fix: legacy streams model_output at index 0 first.
+ expect(startTypes.indexOf("function_call")).toBeLessThan(startTypes.indexOf("model_output"));
+ expect(startTypes[0]).toBe("function_call");
+
+ // The function_call step.start carries identity; args stream as arguments_delta.
+ const fcStart = events.find(
+ (e) =>
+ e.event_type === "step.start" &&
+ (e.step as Record).type === "function_call",
+ )!;
+ expect((fcStart.step as Record).name).toBe("get_weather");
+ const argDelta = events.find(
+ (e) =>
+ e.event_type === "step.delta" &&
+ (e.delta as Record).type === "arguments_delta",
+ )!;
+ expect(JSON.parse((argDelta.delta as Record).arguments as string)).toEqual({
+ city: "NYC",
+ });
+
+ // Text deltas still accumulate.
+ const textDeltas = events.filter(
+ (e) => e.event_type === "step.delta" && (e.delta as Record).type === "text",
+ );
+ const accumulated = textDeltas
+ .map((e) => (e.delta as Record).text as string)
+ .join("");
+ expect(accumulated).toBe("Here you go.");
+ });
+
+ // ── blocks-only: streams tool-first ───────────────────────────────────────
+ it("blocks-only fixture streams tool-first (function_call before model_output)", async () => {
+ instance = await createServer([...allFixtures]);
+ const res = await post(`${instance.url}/v1beta/interactions`, {
+ model: "gemini-2.5-flash",
+ input: "gi blocks-only tool-first",
+ stream: true,
+ });
+ expect(res.status).toBe(200);
+ const events = parseInteractionsSSEEvents(res.body);
+ const startTypes = stepStartTypesInOrder(events);
+ expect(startTypes[0]).toBe("function_call");
+ expect(startTypes.indexOf("function_call")).toBeLessThan(startTypes.indexOf("model_output"));
+
+ const fcStart = events.find(
+ (e) =>
+ e.event_type === "step.start" &&
+ (e.step as Record).type === "function_call",
+ )!;
+ expect((fcStart.step as Record).name).toBe("lookup");
+
+ const textDeltas = events.filter(
+ (e) => e.event_type === "step.delta" && (e.delta as Record).type === "text",
+ );
+ expect(textDeltas.map((e) => (e.delta as Record).text).join("")).toBe("Done.");
+ });
+
+ it("blocks-only fixture returns tool-first non-stream steps[]", async () => {
+ instance = await createServer([...allFixtures]);
+ const res = await post(`${instance.url}/v1beta/interactions`, {
+ model: "gemini-2.5-flash",
+ input: "gi blocks-only tool-first",
+ stream: false,
+ });
+ expect(res.status).toBe(200);
+ const body = JSON.parse(res.body) as {
+ steps: Array>;
+ output_text?: string;
+ };
+ const types = body.steps.map((s) => s.type as string);
+ expect(types[0]).toBe("function_call");
+ expect(types.indexOf("function_call")).toBeLessThan(types.indexOf("model_output"));
+ expect(body.steps[0].name).toBe("lookup");
+ expect(body.output_text).toBe("Done.");
+ });
+
+ // ── back-compat: no-blocks fixture is identical legacy (non-stream) ────────
+ it("back-compat: a no-blocks combined fixture keeps legacy text-first steps[]", async () => {
+ instance = await createServer([...allFixtures]);
+ const res = await post(`${instance.url}/v1beta/interactions`, {
+ model: "gemini-2.5-flash",
+ input: "gi legacy combined",
+ stream: false,
+ });
+ expect(res.status).toBe(200);
+ const body = JSON.parse(res.body) as {
+ steps: Array>;
+ output_text?: string;
+ };
+ expect(body.steps).toHaveLength(2);
+ expect(body.steps[0].type).toBe("model_output");
+ expect((body.steps[0].content as Array<{ text: string }>)[0].text).toBe("Let me help you");
+ expect(body.steps[1].type).toBe("function_call");
+ expect(body.steps[1].name).toBe("analyze_data");
+ expect(body.output_text).toBe("Let me help you");
+ });
+
+ // ── back-compat: no-blocks fixture is identical legacy (streamed) ──────────
+ it("back-compat: a no-blocks combined fixture keeps legacy text-first stream", async () => {
+ instance = await createServer([...allFixtures]);
+ const res = await post(`${instance.url}/v1beta/interactions`, {
+ model: "gemini-2.5-flash",
+ input: "gi legacy combined",
+ stream: true,
+ });
+ expect(res.status).toBe(200);
+ const events = parseInteractionsSSEEvents(res.body);
+ const startTypes = stepStartTypesInOrder(events);
+ // Legacy: model_output (index 0) leads, function_call follows.
+ expect(startTypes[0]).toBe("model_output");
+ expect(startTypes.indexOf("model_output")).toBeLessThan(startTypes.indexOf("function_call"));
+
+ const textDeltas = events.filter(
+ (e) => e.event_type === "step.delta" && (e.delta as Record).type === "text",
+ );
+ expect(textDeltas.map((e) => (e.delta as Record).text).join("")).toBe(
+ "Let me help you",
+ );
+ const argDelta = events.find(
+ (e) =>
+ e.event_type === "step.delta" &&
+ (e.delta as Record).type === "arguments_delta",
+ )!;
+ expect(JSON.parse((argDelta.delta as Record).arguments as string)).toEqual({
+ dataset: "sales",
+ });
+ });
+});
diff --git a/src/__tests__/fixture-blocks-ollama.test.ts b/src/__tests__/fixture-blocks-ollama.test.ts
index c7a4e2fc..ddf526d2 100644
--- a/src/__tests__/fixture-blocks-ollama.test.ts
+++ b/src/__tests__/fixture-blocks-ollama.test.ts
@@ -34,6 +34,19 @@ async function ollamaChatStream(mock: LLMock, userMessage: string): Promise {
+ const res = await fetch(`${mock.url}/api/chat`, {
+ method: "POST",
+ headers: { "Content-Type": "application/json" },
+ body: JSON.stringify({
+ model: "llama3.1",
+ messages: [{ role: "user", content: userMessage }],
+ stream: false,
+ }),
+ });
+ return (await res.json()) as OllamaChunk;
+}
+
describe("Ollama — fixture block ordering (tool-first)", () => {
let mock: LLMock | null = null;
@@ -134,4 +147,49 @@ describe("Ollama — fixture block ordering (tool-first)", () => {
expect(blockContentIdx).toBeLessThan(blockToolIdx);
expect(normalize(blockChunks)).toEqual(normalize(legacyChunks));
});
+
+ it("non-streaming: a blocks-only fixture backfills content and tool_calls from blocks", async () => {
+ // F0 regression (#274): the non-streaming /api/chat builder never received
+ // `response.blocks`, so a blocks-only fixture (no content/toolCalls) rendered
+ // as an empty turn (content:"", tool_calls:[]) — both payloads dropped.
+ mock = new LLMock({ port: 0 });
+ mock.addFixture({
+ match: { userMessage: "test ollama blocks-only nonstreaming" },
+ response: {
+ blocks: [
+ { type: "text", text: "hi" },
+ { type: "toolCall", name: "f", arguments: "{}" },
+ ],
+ },
+ });
+ await mock.start();
+
+ const body = await ollamaChatNonStreaming(mock, "test ollama blocks-only nonstreaming");
+
+ // Text block backfills content; toolCall block backfills tool_calls.
+ expect(body.message?.content).toBe("hi");
+ expect(body.message?.tool_calls?.length).toBe(1);
+ expect(body.message!.tool_calls![0].function.name).toBe("f");
+ expect(body.message!.tool_calls![0].function.arguments).toEqual({});
+ expect(body.done).toBe(true);
+ expect(body).toHaveProperty("total_duration");
+ });
+
+ it("non-streaming: a no-blocks legacy fixture is unchanged (content + tool_calls)", async () => {
+ mock = new LLMock({ port: 0 });
+ mock.addFixture({
+ match: { userMessage: "test ollama legacy nonstreaming" },
+ response: {
+ content: "Let me check.",
+ toolCalls: [{ name: "get_weather", arguments: '{"city":"NYC"}' }],
+ },
+ });
+ await mock.start();
+
+ const body = await ollamaChatNonStreaming(mock, "test ollama legacy nonstreaming");
+
+ expect(body.message?.content).toBe("Let me check.");
+ expect(body.message!.tool_calls![0].function.name).toBe("get_weather");
+ expect(body.message!.tool_calls![0].function.arguments).toEqual({ city: "NYC" });
+ });
});
diff --git a/src/__tests__/fixture-blocks-only.test.ts b/src/__tests__/fixture-blocks-only.test.ts
new file mode 100644
index 00000000..c2a8dedb
--- /dev/null
+++ b/src/__tests__/fixture-blocks-only.test.ts
@@ -0,0 +1,178 @@
+/**
+ * #274 F0 — BLOCKS-ONLY fixtures are FIRST-CLASS.
+ *
+ * A fixture written as `{ blocks: [...] }` with NO `content` and NO `toolCalls`
+ * must be recognized, matched, and streamed in block order — exactly like a
+ * combined `{content, toolCalls, blocks}` fixture, but without the redundant
+ * legacy fields. Before F0, the recognizer required content+toolCalls, so a
+ * blocks-only fixture fell through every guard and the server answered 500
+ * ("no recognized response type" / no-match).
+ *
+ * This suite mirrors the `fixture-blocks-e2e.test.ts` harness: a REAL on-disk
+ * JSON fixture is loaded THROUGH THE REAL LOADER (`mock.loadFixtureFile(...)`),
+ * served by a live `LLMock` HTTP server, and the wire bytes are asserted. It
+ * covers the two providers whose wire format can FULLY express tool-first
+ * ordering: Anthropic typed content blocks and OpenAI Responses output_index
+ * sequencing. A blocks-only `[toolCall, text]` fixture must (a) NOT 500 and
+ * (b) stream the tool BEFORE the text.
+ *
+ * Back-compat for the existing content+toolCalls / text-only / tool-only shapes
+ * is covered by the rest of the suite (notably fixture-blocks-e2e.test.ts's
+ * legacy guard) and is intentionally not duplicated here.
+ */
+import { describe, it, expect, afterEach, beforeEach } from "vitest";
+import { mkdtempSync, rmSync, writeFileSync } from "node:fs";
+import { tmpdir } from "node:os";
+import { join } from "node:path";
+import { LLMock } from "../llmock.js";
+
+function parseSSEEvents(body: string): Array<{ type: string; [key: string]: unknown }> {
+ return body
+ .split("\n\n")
+ .filter((block) => block.trim().length > 0)
+ .map((block) => {
+ const dataLine = block.split("\n").find((l) => l.startsWith("data: "));
+ if (!dataLine) return null;
+ return JSON.parse(dataLine.slice(6)) as { type: string; [key: string]: unknown };
+ })
+ .filter(Boolean) as Array<{ type: string; [key: string]: unknown }>;
+}
+
+let tmpDir: string;
+let mock: LLMock | null = null;
+
+beforeEach(() => {
+ tmpDir = mkdtempSync(join(tmpdir(), "fixture-blocks-only-"));
+});
+
+afterEach(async () => {
+ if (mock) {
+ await mock.stop();
+ mock = null;
+ }
+ rmSync(tmpDir, { recursive: true, force: true });
+});
+
+function writeFixtureFile(name: string, content: unknown): string {
+ const filePath = join(tmpDir, name);
+ writeFileSync(filePath, JSON.stringify(content), "utf-8");
+ return filePath;
+}
+
+describe("#274 F0 e2e: blocks-only fixtures (no content/toolCalls) are first-class", () => {
+ it("Anthropic recognizes a blocks-only fixture and streams tool_use BEFORE text", async () => {
+ // BLOCKS-ONLY: no `content`, no `toolCalls` — only `blocks`.
+ const filePath = writeFixtureFile("anthropic-blocks-only.json", {
+ fixtures: [
+ {
+ match: { userMessage: "blocks-only anthropic tool-first" },
+ response: {
+ blocks: [
+ { type: "toolCall", name: "get_weather", arguments: '{"city":"NYC"}' },
+ { type: "text", text: "Here you go." },
+ ],
+ },
+ },
+ ],
+ });
+
+ mock = new LLMock({ port: 0 });
+ mock.loadFixtureFile(filePath);
+ await mock.start();
+
+ const res = await fetch(`${mock.url}/v1/messages`, {
+ method: "POST",
+ headers: {
+ "Content-Type": "application/json",
+ "x-api-key": "test-key",
+ "anthropic-version": "2023-06-01",
+ },
+ body: JSON.stringify({
+ model: "claude-sonnet-4-6",
+ max_tokens: 1024,
+ messages: [{ role: "user", content: "blocks-only anthropic tool-first" }],
+ stream: true,
+ }),
+ });
+
+ // RED before F0: the blocks-only fixture is unrecognized → no match → 500.
+ expect(res.status).toBe(200);
+
+ const events = parseSSEEvents(await res.text());
+
+ const textBlockStart = events.find(
+ (e) =>
+ e.type === "content_block_start" && (e.content_block as { type: string })?.type === "text",
+ );
+ const toolBlockStart = events.find(
+ (e) =>
+ e.type === "content_block_start" &&
+ (e.content_block as { type: string })?.type === "tool_use",
+ );
+ expect(textBlockStart).toBeDefined();
+ expect(toolBlockStart).toBeDefined();
+
+ // Tool-first: the tool_use content block precedes the text content block.
+ const toolIdx = events.indexOf(toolBlockStart!);
+ const textIdx = events.indexOf(textBlockStart!);
+ expect(toolIdx).toBeLessThan(textIdx);
+
+ const messageDelta = events.find((e) => e.type === "message_delta");
+ expect((messageDelta!.delta as { stop_reason: string }).stop_reason).toBe("tool_use");
+ });
+
+ it("Responses recognizes a blocks-only fixture and assigns function_call output_index 0", async () => {
+ const filePath = writeFixtureFile("responses-blocks-only.json", {
+ fixtures: [
+ {
+ match: { userMessage: "blocks-only responses tool-first" },
+ response: {
+ blocks: [
+ { type: "toolCall", name: "get_weather", arguments: '{"city":"NYC"}' },
+ { type: "text", text: "Here you go." },
+ ],
+ },
+ },
+ ],
+ });
+
+ mock = new LLMock({ port: 0 });
+ mock.loadFixtureFile(filePath);
+ await mock.start();
+
+ const res = await fetch(`${mock.url}/v1/responses`, {
+ method: "POST",
+ headers: { "Content-Type": "application/json", Authorization: "Bearer test" },
+ body: JSON.stringify({
+ model: "gpt-4o",
+ input: [{ role: "user", content: "blocks-only responses tool-first" }],
+ stream: true,
+ }),
+ });
+
+ // RED before F0: blocks-only is unrecognized → no match → 500.
+ expect(res.status).toBe(200);
+
+ const events = parseSSEEvents(await res.text());
+
+ const fcAdded = events.find(
+ (e) =>
+ e.type === "response.output_item.added" &&
+ (e.item as { type: string })?.type === "function_call",
+ );
+ const msgAdded = events.find(
+ (e) =>
+ e.type === "response.output_item.added" && (e.item as { type: string })?.type === "message",
+ );
+ expect(fcAdded).toBeDefined();
+ expect(msgAdded).toBeDefined();
+ expect((fcAdded as unknown as { output_index: number }).output_index).toBe(0);
+ expect((msgAdded as unknown as { output_index: number }).output_index).toBe(1);
+
+ const completed = events.find((e) => e.type === "response.completed");
+ const output = (completed!.response as { output: Array<{ type: string }> }).output;
+ const types = output.map((o) => o.type);
+ expect(types[0]).toBe("function_call");
+ expect(types.indexOf("function_call")).toBeLessThan(types.indexOf("message"));
+ });
+});
diff --git a/src/__tests__/fixture-blocks-scoped-out.test.ts b/src/__tests__/fixture-blocks-scoped-out.test.ts
deleted file mode 100644
index a325b545..00000000
--- a/src/__tests__/fixture-blocks-scoped-out.test.ts
+++ /dev/null
@@ -1,143 +0,0 @@
-/**
- * #274 slot T3 — SCOPED-OUT consumer safety for ordered `blocks`.
- *
- * The `blocks` field is honored only by the five in-scope stream builders
- * (OpenAI chat, Anthropic, Gemini, Ollama, OpenAI Responses + the WS Responses
- * dispatch). The OTHER consumers of `isContentWithToolCallsResponse` —
- * Bedrock (`/model/{id}/invoke`), Cohere (`/v2/chat`), and Gemini Interactions
- * (`/v1beta/interactions`) — were deliberately left UNCHANGED: they read only
- * `.content` / `.toolCalls` and must completely IGNORE `.blocks`.
- *
- * These tests drive each scoped-out consumer with a fixture that ALSO carries a
- * `blocks` array (in an order that differs from the legacy text-first shape).
- * The consumer must NOT crash and must serve the legacy `{content, toolCalls}`
- * payload exactly as if `blocks` were absent.
- */
-import { describe, it, expect, afterEach } from "vitest";
-import * as http from "node:http";
-import type { Fixture, FixtureBlock } from "../types.js";
-import { createServer, type ServerInstance } from "../server.js";
-
-function post(
- url: string,
- body: unknown,
-): Promise<{ status: number; headers: http.IncomingHttpHeaders; body: string }> {
- return new Promise((resolve, reject) => {
- const data = JSON.stringify(body);
- const parsed = new URL(url);
- const req = http.request(
- {
- hostname: parsed.hostname,
- port: parsed.port,
- path: parsed.pathname,
- method: "POST",
- headers: {
- "Content-Type": "application/json",
- "Content-Length": Buffer.byteLength(data),
- },
- },
- (res) => {
- const chunks: Buffer[] = [];
- res.on("data", (c: Buffer) => chunks.push(c));
- res.on("end", () => {
- resolve({
- status: res.statusCode ?? 0,
- headers: res.headers,
- body: Buffer.concat(chunks).toString(),
- });
- });
- },
- );
- req.on("error", reject);
- req.write(data);
- req.end();
- });
-}
-
-// A combined content+toolCalls fixture that ALSO carries a tool-first `blocks`
-// array — the exact shape the scoped-out consumers must ignore.
-const toolFirstBlocks: FixtureBlock[] = [
- { type: "toolCall", name: "get_weather", arguments: '{"city":"SF"}' },
- { type: "text", text: "Let me help you" },
-];
-
-const blocksBearingFixture: Fixture = {
- match: { userMessage: "scoped-out blocks" },
- response: {
- content: "Let me help you",
- toolCalls: [{ name: "get_weather", arguments: '{"city":"SF"}' }],
- blocks: toolFirstBlocks,
- },
-};
-
-let instance: ServerInstance | null = null;
-
-afterEach(async () => {
- if (instance) {
- await new Promise((resolve) => {
- instance!.server.close(() => resolve());
- });
- instance = null;
- }
-});
-
-describe("#274 scoped-out consumers ignore `blocks` without crashing", () => {
- it("Bedrock /model/{id}/invoke serves legacy content+tool_use, ignoring blocks", async () => {
- instance = await createServer([blocksBearingFixture]);
- const res = await post(
- `${instance.url}/model/anthropic.claude-3-5-sonnet-20241022-v2:0/invoke`,
- {
- anthropic_version: "bedrock-2023-05-31",
- max_tokens: 512,
- messages: [{ role: "user", content: "scoped-out blocks" }],
- },
- );
-
- expect(res.status).toBe(200);
- const body = JSON.parse(res.body);
- expect(body.type).toBe("message");
- // Legacy text-first Anthropic shape: text content then tool_use — NOT the
- // tool-first ordering carried in `blocks` (which Bedrock must ignore).
- expect(body.content[0].type).toBe("text");
- expect(body.content[0].text).toBe("Let me help you");
- expect(body.content[1].type).toBe("tool_use");
- expect(body.content[1].name).toBe("get_weather");
- expect(body.content[1].input).toEqual({ city: "SF" });
- expect(body.stop_reason).toBe("tool_use");
- });
-
- it("Cohere /v2/chat serves legacy content+tool_calls, ignoring blocks", async () => {
- instance = await createServer([blocksBearingFixture]);
- const res = await post(`${instance.url}/v2/chat`, {
- model: "command-r-plus",
- messages: [{ role: "user", content: "scoped-out blocks" }],
- stream: false,
- });
-
- expect(res.status).toBe(200);
- const body = JSON.parse(res.body);
- // Cohere reads only content/toolCalls; blocks is ignored, no crash.
- expect(body.message.tool_calls).toHaveLength(1);
- expect(body.message.tool_calls[0].function.name).toBe("get_weather");
- expect(body.message.tool_calls[0].function.arguments).toBe('{"city":"SF"}');
- });
-
- it("Gemini Interactions /v1beta/interactions serves legacy steps, ignoring blocks", async () => {
- instance = await createServer([blocksBearingFixture]);
- const res = await post(`${instance.url}/v1beta/interactions`, {
- model: "gemini-2.5-flash",
- input: "scoped-out blocks",
- stream: false,
- });
-
- expect(res.status).toBe(200);
- const body = JSON.parse(res.body);
- // Reads only content/toolCalls; blocks is ignored, no crash.
- expect(body.status).toBe("requires_action");
- expect(body.output_text).toBe("Let me help you");
- expect(body.steps).toHaveLength(2);
- expect(body.steps[0].type).toBe("model_output");
- expect(body.steps[1].type).toBe("function_call");
- expect(body.steps[1].name).toBe("get_weather");
- });
-});
diff --git a/src/__tests__/fixture-blocks-ws-gemini-live.test.ts b/src/__tests__/fixture-blocks-ws-gemini-live.test.ts
new file mode 100644
index 00000000..2a59e69a
--- /dev/null
+++ b/src/__tests__/fixture-blocks-ws-gemini-live.test.ts
@@ -0,0 +1,244 @@
+import { describe, it, expect, afterEach } from "vitest";
+import { createServer, type ServerInstance } from "../server.js";
+import type { Fixture } from "../types.js";
+import { connectWebSocket } from "./ws-test-client.js";
+
+// --- fixtures ---
+
+// Blocks-only fixture (#274): tool-call BEFORE text, no top-level content/toolCalls.
+// Post-F0 this matches `isContentWithToolCallsResponse`. Before the WS fix, the
+// content+toolCalls branch read only `content ?? ""` / `toolCalls ?? []` and
+// IGNORED `response.blocks`, so this streamed an EMPTY payload (silent drop).
+const blocksOnlyFixture: Fixture = {
+ match: { userMessage: "blocks-only" },
+ response: {
+ blocks: [
+ { type: "toolCall", name: "get_weather", arguments: '{"city":"NYC"}' },
+ { type: "text", text: "Checking weather." },
+ ],
+ },
+};
+
+// Back-compat control: legacy combined { content, toolCalls } (no blocks).
+const legacyCombinedFixture: Fixture = {
+ match: { userMessage: "legacy-combined" },
+ response: {
+ content: "Working on it.",
+ toolCalls: [{ name: "do_thing", arguments: '{"x":1}' }],
+ },
+};
+
+// Empty-text-block leak fixture: an empty text block FIRST, then a toolCall,
+// with truncateAfterChunks:1. The empty-text guard used to emit a (useless)
+// empty wire message and `continue` WITHOUT spending a truncate tick, so the
+// toolCall leaked through — 2 messages emitted instead of the 1 truncation
+// should have allowed.
+// NB: match keys are chosen so neither is a substring of the other (the
+// userMessage matcher matches by inclusion).
+const emptyTextThenToolTruncateFixture: Fixture = {
+ match: { userMessage: "blocks-emptytext-trunc" },
+ truncateAfterChunks: 1,
+ response: {
+ blocks: [
+ { type: "text", text: "" },
+ { type: "toolCall", name: "get_weather", arguments: '{"city":"NYC"}' },
+ // Second toolCall behind the allotted slot — it must always be truncated.
+ // Pre-fix the empty-text block spent NO truncate tick, so BOTH toolCalls
+ // (plus the empty text message) leaked; post-fix the first toolCall is the
+ // single allotted chunk and this one is truncated away.
+ { type: "toolCall", name: "leaked_tool", arguments: "{}" },
+ ],
+ },
+};
+
+// Control: identical shape but a NON-empty leading text block. Truncation at 1
+// must still fire after the first text chunk, so the toolCall never emits. This
+// guards that the fix does not alter non-empty-block truncation accounting.
+const nonEmptyTextThenToolTruncateFixture: Fixture = {
+ match: { userMessage: "blocks-filledtext-trunc" },
+ truncateAfterChunks: 1,
+ response: {
+ blocks: [
+ { type: "text", text: "Hi" },
+ { type: "toolCall", name: "get_weather", arguments: '{"city":"NYC"}' },
+ ],
+ },
+};
+
+const allFixtures: Fixture[] = [
+ blocksOnlyFixture,
+ legacyCombinedFixture,
+ emptyTextThenToolTruncateFixture,
+ nonEmptyTextThenToolTruncateFixture,
+];
+
+// --- helpers ---
+
+const GEMINI_WS_PATH =
+ "/ws/google.ai.generativelanguage.v1beta.GenerativeService.BidiGenerateContent";
+
+function setupMsg(model = "gemini-2.0-flash-exp"): string {
+ return JSON.stringify({ setup: { model } });
+}
+
+function clientContentMsg(text: string): string {
+ return JSON.stringify({
+ clientContent: {
+ turns: [{ role: "user", parts: [{ text }] }],
+ turnComplete: true,
+ },
+ });
+}
+
+// --- tests ---
+
+let instance: ServerInstance | null = null;
+
+afterEach(async () => {
+ if (instance) {
+ await new Promise((resolve) => {
+ instance!.server.close(() => resolve());
+ });
+ instance = null;
+ }
+});
+
+describe("WebSocket Gemini Live — blocks-only fixtures (#274)", () => {
+ it("streams a NON-empty payload for a blocks-only fixture, tool-first then text", async () => {
+ instance = await createServer(allFixtures);
+ const ws = await connectWebSocket(instance.url, GEMINI_WS_PATH);
+
+ ws.send(setupMsg());
+ await ws.waitForMessages(1); // setupComplete
+
+ ws.send(clientContentMsg("blocks-only"));
+
+ // setupComplete + toolCall + text serverContent + turnComplete
+ const raw = await ws.waitForMessages(4);
+ const msgs = raw.slice(1).map((r) => JSON.parse(r));
+
+ // RED before fix: no toolCall and no text content (empty payload / silent drop).
+ const toolCallMsgs = msgs.filter((m) => m.toolCall);
+ const textMsgs = msgs.filter((m) => m.serverContent?.modelTurn?.parts?.[0]?.text);
+
+ // Tool call must be present and carry the block's name + args.
+ expect(toolCallMsgs).toHaveLength(1);
+ expect(toolCallMsgs[0].toolCall.functionCalls).toHaveLength(1);
+ expect(toolCallMsgs[0].toolCall.functionCalls[0].name).toBe("get_weather");
+ expect(toolCallMsgs[0].toolCall.functionCalls[0].args).toEqual({ city: "NYC" });
+
+ // Text must be present and reconstruct the block's text.
+ const fullText = textMsgs.map((m) => m.serverContent.modelTurn.parts[0].text).join("");
+ expect(fullText).toBe("Checking weather.");
+
+ // Ordering: Gemini Live WS expresses order via sequential messages — the
+ // toolCall message must arrive BEFORE the first text content message.
+ const toolIdx = msgs.findIndex((m) => m.toolCall);
+ const textIdx = msgs.findIndex((m) => m.serverContent?.modelTurn?.parts?.[0]?.text);
+ expect(toolIdx).toBeGreaterThanOrEqual(0);
+ expect(textIdx).toBeGreaterThanOrEqual(0);
+ expect(toolIdx).toBeLessThan(textIdx);
+
+ // Terminal turnComplete.
+ const last = msgs[msgs.length - 1];
+ expect(last.serverContent.turnComplete).toBe(true);
+
+ ws.close();
+ });
+
+ it("truncateAfterChunks:1 — empty leading text block must not leak the following toolCall", async () => {
+ instance = await createServer(allFixtures);
+ const ws = await connectWebSocket(instance.url, GEMINI_WS_PATH);
+
+ ws.send(setupMsg());
+ await ws.waitForMessages(1); // setupComplete
+
+ ws.send(clientContentMsg("blocks-emptytext-trunc"));
+
+ // Truncation closes the socket; wait for it, then inspect everything that
+ // arrived. Pre-fix the empty-text block emitted a useless empty message
+ // WITHOUT spending the single truncate tick, so BOTH toolCalls leaked
+ // behind it (3 post-setup messages). Post-fix the empty block produces
+ // nothing and the FIRST toolCall is the single allotted chunk.
+ await ws.waitForClose();
+ const msgs = ws
+ .getMessages()
+ .slice(1)
+ .map((r) => JSON.parse(r));
+
+ // Exactly one message survives truncation, and it is the first toolCall —
+ // the empty text emitted nothing and the second toolCall was truncated.
+ expect(msgs).toHaveLength(1);
+ const toolCallMsgs = msgs.filter((m) => m.toolCall);
+ expect(toolCallMsgs).toHaveLength(1);
+ expect(toolCallMsgs[0].toolCall.functionCalls[0].name).toBe("get_weather");
+
+ // The block behind the allotted slot must NOT leak.
+ const leaked = msgs.some((m) => m.toolCall?.functionCalls?.[0]?.name === "leaked_tool");
+ expect(leaked).toBe(false);
+
+ // No spurious empty-text message may precede the toolCall.
+ const emptyTextMsgs = msgs.filter((m) => m.serverContent?.modelTurn?.parts?.[0]?.text === "");
+ expect(emptyTextMsgs).toHaveLength(0);
+
+ ws.close();
+ });
+
+ it("truncateAfterChunks:1 — non-empty leading text block truncates the following toolCall (control)", async () => {
+ instance = await createServer(allFixtures);
+ const ws = await connectWebSocket(instance.url, GEMINI_WS_PATH);
+
+ ws.send(setupMsg());
+ await ws.waitForMessages(1); // setupComplete
+
+ ws.send(clientContentMsg("blocks-filledtext-trunc"));
+
+ await ws.waitForClose();
+ const msgs = ws
+ .getMessages()
+ .slice(1)
+ .map((r) => JSON.parse(r));
+
+ // The single text chunk is emitted, then truncation fires — toolCall gone.
+ const toolCallMsgs = msgs.filter((m) => m.toolCall);
+ expect(toolCallMsgs).toHaveLength(0);
+
+ const textMsgs = msgs.filter((m) => m.serverContent?.modelTurn?.parts?.[0]?.text);
+ const fullText = textMsgs.map((m) => m.serverContent.modelTurn.parts[0].text).join("");
+ expect(fullText).toBe("Hi");
+
+ ws.close();
+ });
+
+ it("back-compat: legacy { content, toolCalls } (no blocks) streams text-then-tool unchanged", async () => {
+ instance = await createServer(allFixtures);
+ const ws = await connectWebSocket(instance.url, GEMINI_WS_PATH);
+
+ ws.send(setupMsg());
+ await ws.waitForMessages(1);
+
+ ws.send(clientContentMsg("legacy-combined"));
+
+ // setupComplete + text content + toolCall + turnComplete
+ const raw = await ws.waitForMessages(4);
+ const msgs = raw.slice(1).map((r) => JSON.parse(r));
+
+ const textMsgs = msgs.filter((m) => m.serverContent?.modelTurn?.parts?.[0]?.text);
+ const fullText = textMsgs.map((m) => m.serverContent.modelTurn.parts[0].text).join("");
+ expect(fullText).toBe("Working on it.");
+
+ const toolCallMsgs = msgs.filter((m) => m.toolCall);
+ expect(toolCallMsgs).toHaveLength(1);
+ expect(toolCallMsgs[0].toolCall.functionCalls[0].name).toBe("do_thing");
+
+ // Legacy order: text content arrives before the tool call.
+ const textIdx = msgs.findIndex((m) => m.serverContent?.modelTurn?.parts?.[0]?.text);
+ const toolIdx = msgs.findIndex((m) => m.toolCall);
+ expect(textIdx).toBeLessThan(toolIdx);
+
+ const last = msgs[msgs.length - 1];
+ expect(last.serverContent.turnComplete).toBe(true);
+
+ ws.close();
+ });
+});
diff --git a/src/__tests__/fixture-blocks-ws-realtime.test.ts b/src/__tests__/fixture-blocks-ws-realtime.test.ts
new file mode 100644
index 00000000..7e79ce8f
--- /dev/null
+++ b/src/__tests__/fixture-blocks-ws-realtime.test.ts
@@ -0,0 +1,197 @@
+import { describe, it, expect, afterEach } from "vitest";
+import { createServer, type ServerInstance } from "../server.js";
+import type { Fixture } from "../types.js";
+import { connectWebSocket } from "./ws-test-client.js";
+
+// --- helpers (mirror ws-realtime.test.ts harness) ---
+
+interface WSEvent {
+ type: string;
+ event_id?: string;
+ [key: string]: unknown;
+}
+
+function parseEvents(raw: string[]): WSEvent[] {
+ return raw.map((m) => JSON.parse(m) as WSEvent);
+}
+
+function conversationItemCreate(role: string, text: string): string {
+ return JSON.stringify({
+ type: "conversation.item.create",
+ item: {
+ type: "message",
+ role,
+ content: [{ type: "input_text", text }],
+ },
+ });
+}
+
+function responseCreate(): string {
+ return JSON.stringify({ type: "response.create" });
+}
+
+let instance: ServerInstance | null = null;
+
+afterEach(async () => {
+ if (instance) {
+ await new Promise((resolve) => {
+ instance!.server.close(() => resolve());
+ });
+ instance = null;
+ }
+});
+
+// ─── #274: blocks honored on the Realtime WS surface ────────────────────────
+
+describe("WebSocket /v1/realtime — fixture blocks", () => {
+ it("blocks-only fixture streams a NON-empty payload (tool-first order)", async () => {
+ // A blocks-only fixture (no `content`, no `toolCalls`) — post-F0 this matches
+ // isContentWithToolCallsResponse. BEFORE the fix, the WS content+toolCalls
+ // branch reads only `response.content ?? ""` / `response.toolCalls ?? []`,
+ // so it streams an empty text message and DROPS every block: a silent empty
+ // payload. AFTER the fix, the branch iterates `blocks` in array order.
+ const blocksOnlyFixture: Fixture = {
+ match: { userMessage: "blocks-only-rt" },
+ // Large chunkSize => each text/args body is a single delta, making the
+ // event count deterministic regardless of the default chunk size.
+ chunkSize: 100,
+ response: {
+ blocks: [
+ { type: "toolCall", name: "get_weather", arguments: '{"city":"NYC"}', id: "call_1" },
+ { type: "text", text: "Here is the weather." },
+ ],
+ },
+ };
+ instance = await createServer([blocksOnlyFixture]);
+ const ws = await connectWebSocket(instance.url, "/v1/realtime");
+
+ await ws.waitForMessages(1); // session.created
+
+ ws.send(conversationItemCreate("user", "blocks-only-rt"));
+ await ws.waitForMessages(2); // + conversation.item.added
+
+ ws.send(responseCreate());
+
+ // With chunkSize 100 (single-chunk bodies):
+ // tool block: added + arg.delta + arg.done + item.done + conv.done = 5
+ // text block: added + content_part.added + text.delta + text.done
+ // + content_part.done + item.done + conv.done = 7
+ // + response.created (first) + response.done (last) = 14 response events.
+ // Total: 2 (session.created + conversation.item.added) + 14 = 16.
+ const allRaw = await ws.waitForMessages(16);
+ const responseEvents = parseEvents(allRaw.slice(2));
+ const types = responseEvents.map((e) => e.type);
+
+ // Payload is NON-empty: both a tool call and text were emitted.
+ expect(types[0]).toBe("response.created");
+ expect(types[types.length - 1]).toBe("response.done");
+ expect(types).toContain("response.function_call_arguments.done");
+ expect(types).toContain("response.output_text.done");
+
+ // The dropped-payload bug: tool-call arguments must survive.
+ const argDone = responseEvents.find((e) => e.type === "response.function_call_arguments.done");
+ expect(argDone).toBeDefined();
+ expect(argDone!.arguments).toBe('{"city":"NYC"}');
+
+ // The dropped-payload bug: text must survive.
+ const textDone = responseEvents.find((e) => e.type === "response.output_text.done");
+ expect(textDone).toBeDefined();
+ expect(textDone!.text).toBe("Here is the weather.");
+
+ // Order IS wire-expressible on Realtime (output items are sequenced on the
+ // wire with explicit output_index). tool-first means the function_call item
+ // is added before the text message item.
+ const added = responseEvents.filter((e) => e.type === "response.output_item.added");
+ expect(added.length).toBe(2);
+ expect((added[0].item as Record).type).toBe("function_call");
+ expect((added[0].item as Record).name).toBe("get_weather");
+ expect((added[1].item as Record).type).toBe("message");
+ expect(added[0].output_index).toBe(0);
+ expect(added[1].output_index).toBe(1);
+
+ // response.done carries both output items.
+ const doneResp = responseEvents[responseEvents.length - 1].response as Record;
+ expect((doneResp.output as unknown[]).length).toBe(2);
+
+ ws.close();
+ });
+
+ it("combined {content,toolCalls,blocks} honors block ordering (text-first)", async () => {
+ const combinedFixture: Fixture = {
+ match: { userMessage: "combined-blocks-rt" },
+ chunkSize: 100,
+ response: {
+ content: "Checking now.",
+ toolCalls: [{ name: "get_weather", arguments: '{"city":"NYC"}' }],
+ blocks: [
+ { type: "text", text: "Checking now." },
+ { type: "toolCall", name: "get_weather", arguments: '{"city":"NYC"}', id: "call_a" },
+ ],
+ },
+ };
+ instance = await createServer([combinedFixture]);
+ const ws = await connectWebSocket(instance.url, "/v1/realtime");
+
+ await ws.waitForMessages(1);
+ ws.send(conversationItemCreate("user", "combined-blocks-rt"));
+ await ws.waitForMessages(2);
+ ws.send(responseCreate());
+
+ // text block (7) + tool block (5) + created + done = 14; +2 prefix = 16.
+ const allRaw = await ws.waitForMessages(16);
+ const responseEvents = parseEvents(allRaw.slice(2));
+
+ const added = responseEvents.filter((e) => e.type === "response.output_item.added");
+ expect(added.length).toBe(2);
+ // blocks array order: text first, then tool.
+ expect((added[0].item as Record).type).toBe("message");
+ expect((added[1].item as Record).type).toBe("function_call");
+
+ const textDone = responseEvents.find((e) => e.type === "response.output_text.done");
+ expect(textDone!.text).toBe("Checking now.");
+ const argDone = responseEvents.find((e) => e.type === "response.function_call_arguments.done");
+ expect(argDone!.arguments).toBe('{"city":"NYC"}');
+
+ ws.close();
+ });
+
+ it("back-compat: a no-blocks {content,toolCalls} fixture is unchanged (text-first)", async () => {
+ const legacyFixture: Fixture = {
+ match: { userMessage: "legacy-ctc-rt" },
+ chunkSize: 100,
+ response: {
+ content: "Let me check the weather for you.",
+ toolCalls: [{ name: "get_weather", arguments: '{"city":"NYC"}' }],
+ },
+ };
+ instance = await createServer([legacyFixture]);
+ const ws = await connectWebSocket(instance.url, "/v1/realtime");
+
+ await ws.waitForMessages(1);
+ ws.send(conversationItemCreate("user", "legacy-ctc-rt"));
+ await ws.waitForMessages(2);
+ ws.send(responseCreate());
+
+ // Legacy text item (7) + tool item (5) + created + done = 14; +2 prefix = 16.
+ const allRaw = await ws.waitForMessages(16);
+ const responseEvents = parseEvents(allRaw.slice(2));
+ const types = responseEvents.map((e) => e.type);
+
+ // Legacy: text item at index 0 (phase "commentary"), tool item at index 1.
+ const added = responseEvents.filter((e) => e.type === "response.output_item.added");
+ expect(added.length).toBe(2);
+ expect((added[0].item as Record).type).toBe("message");
+ expect((added[0].item as Record).phase).toBe("commentary");
+ expect((added[1].item as Record).type).toBe("function_call");
+
+ const textDone = responseEvents.find((e) => e.type === "response.output_text.done");
+ expect(textDone!.text).toBe("Let me check the weather for you.");
+ const argDone = responseEvents.find((e) => e.type === "response.function_call_arguments.done");
+ expect(argDone!.arguments).toBe('{"city":"NYC"}');
+
+ expect(types[0]).toBe("response.created");
+ expect(types[types.length - 1]).toBe("response.done");
+
+ ws.close();
+ });
+});
diff --git a/src/__tests__/fixture-loader.test.ts b/src/__tests__/fixture-loader.test.ts
index c7701a22..ca4719c1 100644
--- a/src/__tests__/fixture-loader.test.ts
+++ b/src/__tests__/fixture-loader.test.ts
@@ -916,6 +916,92 @@ describe("validateFixtures", () => {
expect(results.filter((r) => r.severity === "error")).toEqual([]);
});
+ // P2 (#274): empty-text block rejection + blocks/content divergence warning.
+
+ it("error: text block with empty text string", () => {
+ const fixtures = [
+ makeFixture({
+ response: {
+ blocks: [{ type: "text", text: "" }],
+ } as never,
+ }),
+ ];
+ const results = validateFixtures(fixtures);
+ expect(
+ results.some(
+ (r) =>
+ r.severity === "error" && r.message.includes("blocks[0]") && r.message.includes("empty"),
+ ),
+ ).toBe(true);
+ });
+
+ it("warning: blocks diverge from content", () => {
+ const fixtures = [
+ makeFixture({
+ response: {
+ content: "Original text.",
+ blocks: [{ type: "text", text: "Different text." }],
+ } as never,
+ }),
+ ];
+ const results = validateFixtures(fixtures);
+ expect(results.some((r) => r.severity === "warning" && r.message.includes("diverge"))).toBe(
+ true,
+ );
+ });
+
+ it("warning: blocks diverge from toolCalls", () => {
+ const fixtures = [
+ makeFixture({
+ response: {
+ content: "Done.",
+ toolCalls: [{ name: "search", arguments: "{}" }],
+ blocks: [
+ { type: "toolCall", name: "lookup", arguments: "{}" },
+ { type: "text", text: "Done." },
+ ],
+ } as never,
+ }),
+ ];
+ const results = validateFixtures(fixtures);
+ expect(results.some((r) => r.severity === "warning" && r.message.includes("diverge"))).toBe(
+ true,
+ );
+ });
+
+ it("no warning: blocks-only fixture (no content/toolCalls)", () => {
+ const fixtures = [
+ makeFixture({
+ response: {
+ blocks: [
+ { type: "toolCall", name: "search", arguments: '{"q":"x"}', id: "call_1" },
+ { type: "text", text: "Done." },
+ ],
+ } as never,
+ }),
+ ];
+ const results = validateFixtures(fixtures);
+ expect(results.some((r) => r.message.includes("diverge"))).toBe(false);
+ expect(results.filter((r) => r.severity === "error")).toEqual([]);
+ });
+
+ it("no warning: blocks matching content + toolCalls", () => {
+ const fixtures = [
+ makeFixture({
+ response: {
+ content: "Done.",
+ toolCalls: [{ name: "search", arguments: '{"q":"x"}' }],
+ blocks: [
+ { type: "toolCall", name: "search", arguments: '{"q":"x"}', id: "call_1" },
+ { type: "text", text: "Done." },
+ ],
+ } as never,
+ }),
+ ];
+ const results = validateFixtures(fixtures);
+ expect(results.some((r) => r.message.includes("diverge"))).toBe(false);
+ });
+
it("error: error response with empty message", () => {
const fixtures = [
makeFixture({ response: { error: { message: "", type: "e" }, status: 500 } }),
diff --git a/src/__tests__/gemini-interactions.test.ts b/src/__tests__/gemini-interactions.test.ts
index 608ca6c6..3aa38044 100644
--- a/src/__tests__/gemini-interactions.test.ts
+++ b/src/__tests__/gemini-interactions.test.ts
@@ -1480,9 +1480,12 @@ describe("collapseGeminiInteractionsSSE", () => {
'data: {"event_type":"step.stop","index":0,"event_id":"evt_3"}',
].join("\n\n");
const result = collapseGeminiInteractionsSSE(sse);
- // The (corrupt) call is still surfaced, but flagged so the recorder warns.
+ // The call is still surfaced and flagged so the recorder warns, but the
+ // unparseable assembly is NOT persisted — it falls back to valid "{}" so the
+ // fixture reloads cleanly (the loader JSON.parses arguments). See #274.
expect(result.toolCalls).toHaveLength(1);
- expect(result.toolCalls![0].arguments).toBe('{"city":"NY');
+ expect(result.toolCalls![0].arguments).toBe("{}");
+ expect(() => JSON.parse(result.toolCalls![0].arguments)).not.toThrow();
expect(result.droppedChunks).toBe(1);
expect(result.firstDroppedSample).toMatch(/not valid JSON/);
});
diff --git a/src/__tests__/stream-collapse.test.ts b/src/__tests__/stream-collapse.test.ts
index 4908cb84..d8789f19 100644
--- a/src/__tests__/stream-collapse.test.ts
+++ b/src/__tests__/stream-collapse.test.ts
@@ -6,10 +6,13 @@ import {
collapseOllamaNDJSON,
collapseCohereSSE,
collapseBedrockEventStream,
+ collapseGeminiInteractionsSSE,
collapseStreamingResponse,
} from "../stream-collapse.js";
import { encodeEventStreamMessage, encodeEventStreamFrame } from "../aws-event-stream.js";
import { parseHarmonyContent } from "../harmony.js";
+import { validateFixtures } from "../fixture-loader.js";
+import type { Fixture } from "../types.js";
// ---------------------------------------------------------------------------
// 1. OpenAI SSE
@@ -3928,4 +3931,389 @@ describe("stream block-order instrumentation (#274)", () => {
]);
});
});
+
+ // ---- Cohere SSE ----------------------------------------------------------
+ describe("collapseCohereSSE blocks + zero-arg", () => {
+ const textDelta = (text: string) =>
+ [
+ `event: content-delta`,
+ `data: ${JSON.stringify({
+ type: "content-delta",
+ index: 0,
+ delta: { message: { content: { type: "text", text } } },
+ })}`,
+ "",
+ ].join("\n");
+ const toolStart = (index: number, id: string, name: string) =>
+ [
+ `event: tool-call-start`,
+ `data: ${JSON.stringify({
+ type: "tool-call-start",
+ index,
+ delta: { message: { tool_calls: { id, type: "function", function: { name } } } },
+ })}`,
+ "",
+ ].join("\n");
+ const toolArgDelta = (index: number, args: string) =>
+ [
+ `event: tool-call-delta`,
+ `data: ${JSON.stringify({
+ type: "tool-call-delta",
+ index,
+ delta: { message: { tool_calls: { function: { arguments: args } } } },
+ })}`,
+ "",
+ ].join("\n");
+
+ it("zero-arg tool call → flat arguments is '{}' not '' (F5)", () => {
+ const body = [toolStart(0, "call_1", "no_args")].join("\n");
+ const result = collapseCohereSSE(body);
+ expect(result.toolCalls).toHaveLength(1);
+ expect(result.toolCalls![0].arguments).toBe("{}");
+ expect(() => JSON.parse(result.toolCalls![0].arguments)).not.toThrow();
+ });
+
+ it("text-first stream is NOT interleaved → no blocks (legacy byte-identical)", () => {
+ const body = [
+ textDelta("Hello "),
+ textDelta("world"),
+ toolStart(0, "call_1", "get_weather"),
+ toolArgDelta(0, '{"city":"Paris"}'),
+ ].join("\n");
+ const result = collapseCohereSSE(body);
+ expect(result.content).toBe("Hello world");
+ expect(result.toolCalls).toHaveLength(1);
+ expect(result.blocks).toBeUndefined();
+ });
+
+ it("tool-first stream is interleaved → blocks in tool-first order", () => {
+ const body = [
+ toolStart(0, "call_1", "get_weather"),
+ toolArgDelta(0, '{"city":"Paris"}'),
+ textDelta("Here you go"),
+ ].join("\n");
+ const result = collapseCohereSSE(body);
+ expect(result.blocks).toBeDefined();
+ expect(result.blocks).toEqual([
+ { type: "toolCall", name: "get_weather", arguments: '{"city":"Paris"}', id: "call_1" },
+ { type: "text", text: "Here you go" },
+ ]);
+ });
+
+ it("text-after-tool interleave captured in stream order", () => {
+ const body = [
+ textDelta("before"),
+ toolStart(0, "call_1", "fn"),
+ toolArgDelta(0, "{}"),
+ textDelta("after"),
+ ].join("\n");
+ const result = collapseCohereSSE(body);
+ expect(result.blocks).toEqual([
+ { type: "text", text: "before" },
+ { type: "toolCall", name: "fn", arguments: "{}", id: "call_1" },
+ { type: "text", text: "after" },
+ ]);
+ });
+
+ it("blocks ⟷ flat toolCalls consistent + record→reload round-trip valid (F4/F5)", () => {
+ const body = [
+ toolStart(0, "call_1", "first"),
+ toolArgDelta(0, '{"a":1}'),
+ textDelta("middle"),
+ toolStart(1, "call_2", "second"),
+ toolArgDelta(1, '{"b":2}'),
+ ].join("\n");
+ const result = collapseCohereSSE(body);
+ expect(result.blocks).toBeDefined();
+ const blockToolCalls = result.blocks!.filter((b) => b.type === "toolCall");
+ expect(result.toolCalls).toHaveLength(blockToolCalls.length);
+ result.toolCalls!.forEach((tc, i) => {
+ const block = blockToolCalls[i] as { name: string; arguments: string; id?: string };
+ expect(block.name).toBe(tc.name);
+ expect(block.arguments).toBe(tc.arguments);
+ expect(block.id).toBe(tc.id);
+ });
+ // Every persisted arguments string (blocks AND flat) must be valid JSON so
+ // the fixture re-loads without a JSON.parse validation error.
+ for (const b of result.blocks!) {
+ if (b.type === "toolCall") {
+ expect(() => JSON.parse((b as { arguments: string }).arguments)).not.toThrow();
+ }
+ }
+ for (const tc of result.toolCalls!) {
+ expect(() => JSON.parse(tc.arguments)).not.toThrow();
+ }
+ });
+ });
+
+ // ---- Bedrock EventStream (binary) ---------------------------------------
+ describe("collapseBedrockEventStream blocks + zero-arg", () => {
+ // Converse (camelCase) frame builders
+ const cText = (text: string) =>
+ encodeEventStreamMessage("contentBlockDelta", {
+ contentBlockIndex: 0,
+ contentBlockDelta: { contentBlockIndex: 0, delta: { text } },
+ });
+ const cToolStart = (index: number, id: string, name: string) =>
+ encodeEventStreamMessage("contentBlockStart", {
+ contentBlockIndex: index,
+ contentBlockStart: {
+ contentBlockIndex: index,
+ start: { toolUse: { toolUseId: id, name } },
+ },
+ });
+ const cToolArg = (index: number, input: string) =>
+ encodeEventStreamMessage("contentBlockDelta", {
+ contentBlockIndex: index,
+ contentBlockDelta: { contentBlockIndex: index, delta: { toolUse: { input } } },
+ });
+ // Anthropic-native (flat type) frame builders
+ const nText = (text: string) =>
+ encodeEventStreamMessage("chunk", {
+ type: "content_block_delta",
+ index: 0,
+ delta: { type: "text_delta", text },
+ });
+ const nToolStart = (index: number, id: string, name: string) =>
+ encodeEventStreamMessage("chunk", {
+ type: "content_block_start",
+ index,
+ content_block: { type: "tool_use", id, name },
+ });
+ const nToolArg = (index: number, partial: string) =>
+ encodeEventStreamMessage("chunk", {
+ type: "content_block_delta",
+ index,
+ delta: { type: "input_json_delta", partial_json: partial },
+ });
+
+ it("Converse: zero-arg tool call → flat arguments is '{}' not '' (F5)", () => {
+ const buf = Buffer.concat([cToolStart(0, "tool_1", "no_args")]);
+ const result = collapseBedrockEventStream(buf);
+ expect(result.toolCalls).toHaveLength(1);
+ expect(result.toolCalls![0].arguments).toBe("{}");
+ expect(() => JSON.parse(result.toolCalls![0].arguments)).not.toThrow();
+ });
+
+ it("Anthropic-native: zero-arg tool call → flat arguments is '{}' not '' (F5)", () => {
+ const buf = Buffer.concat([nToolStart(0, "toolu_1", "no_args")]);
+ const result = collapseBedrockEventStream(buf);
+ expect(result.toolCalls).toHaveLength(1);
+ expect(result.toolCalls![0].arguments).toBe("{}");
+ expect(() => JSON.parse(result.toolCalls![0].arguments)).not.toThrow();
+ });
+
+ it("Converse: tool-first stream → content preserved (no longer dropped) AND blocks ordered", () => {
+ const buf = Buffer.concat([
+ cToolStart(0, "tool_1", "get_weather"),
+ cToolArg(0, '{"city":"Paris"}'),
+ cText("Here you go"),
+ ]);
+ const result = collapseBedrockEventStream(buf);
+ // Regression: a tool-bearing turn that ALSO streamed text used to drop content.
+ expect(result.content).toBe("Here you go");
+ expect(result.blocks).toEqual([
+ { type: "toolCall", name: "get_weather", arguments: '{"city":"Paris"}', id: "tool_1" },
+ { type: "text", text: "Here you go" },
+ ]);
+ });
+
+ it("Anthropic-native: text-after-tool interleave captured in stream order", () => {
+ const buf = Buffer.concat([
+ nText("before"),
+ nToolStart(0, "toolu_1", "fn"),
+ nToolArg(0, "{}"),
+ nText("after"),
+ ]);
+ const result = collapseBedrockEventStream(buf);
+ expect(result.content).toBe("beforeafter");
+ expect(result.blocks).toEqual([
+ { type: "text", text: "before" },
+ { type: "toolCall", name: "fn", arguments: "{}", id: "toolu_1" },
+ { type: "text", text: "after" },
+ ]);
+ });
+
+ it("text-first stream → blocks undefined (legacy byte-identical)", () => {
+ const buf = Buffer.concat([
+ cText("Hello"),
+ cToolStart(0, "tool_1", "get_weather"),
+ cToolArg(0, '{"city":"Paris"}'),
+ ]);
+ const result = collapseBedrockEventStream(buf);
+ expect(result.content).toBe("Hello");
+ expect(result.toolCalls).toHaveLength(1);
+ expect(result.blocks).toBeUndefined();
+ });
+
+ it("blocks ⟷ flat toolCalls consistent + round-trip valid JSON (F4/F5)", () => {
+ const buf = Buffer.concat([
+ cToolStart(0, "tool_1", "first"),
+ cToolArg(0, '{"a":1}'),
+ cText("middle"),
+ cToolStart(1, "tool_2", "second"),
+ cToolArg(1, '{"b":2}'),
+ ]);
+ const result = collapseBedrockEventStream(buf);
+ expect(result.blocks).toBeDefined();
+ const blockToolCalls = result.blocks!.filter((b) => b.type === "toolCall");
+ expect(result.toolCalls).toHaveLength(blockToolCalls.length);
+ result.toolCalls!.forEach((tc, i) => {
+ const block = blockToolCalls[i] as { name: string; arguments: string; id?: string };
+ expect(block.name).toBe(tc.name);
+ expect(block.arguments).toBe(tc.arguments);
+ expect(block.id).toBe(tc.id);
+ });
+ for (const b of result.blocks!) {
+ if (b.type === "toolCall") {
+ expect(() => JSON.parse((b as { arguments: string }).arguments)).not.toThrow();
+ }
+ }
+ for (const tc of result.toolCalls!) {
+ expect(() => JSON.parse(tc.arguments)).not.toThrow();
+ }
+ });
+ });
+
+ // ---- Gemini Interactions SSE (args-only; NO block-capture) ---------------
+ // DECISION: block-capture is intentionally NOT applied to this collapser. The
+ // 2.x protocol is step/index-addressed and the finalizer emits tool calls in
+ // SORTED step-index order, interleaved with arrival-pushed legacy 1.x calls —
+ // a hybrid that cannot be reconciled with arrival-order OrderAtoms by identity.
+ // Emitting blocks here would risk a blocks⟷flat disagreement (violating the
+ // #274 F4/F5 invariant), so we only normalize arguments and assert blocks stays
+ // undefined for an interleaved stream.
+ describe("collapseGeminiInteractionsSSE args-only", () => {
+ const data = (obj: Record) => `data: ${JSON.stringify(obj)}`;
+
+ it("2.x zero-arg (whitespace argsStr) → arguments '{}' not '' or ' '", () => {
+ const body = [
+ data({
+ event_type: "step.start",
+ index: 0,
+ step: { type: "function_call", id: "s1", name: "fn" },
+ }),
+ data({
+ event_type: "step.delta",
+ index: 0,
+ delta: { type: "arguments_delta", arguments: " " },
+ }),
+ ].join("\n\n");
+ const result = collapseGeminiInteractionsSSE(body);
+ expect(result.toolCalls).toHaveLength(1);
+ expect(result.toolCalls![0].arguments).toBe("{}");
+ expect(() => JSON.parse(result.toolCalls![0].arguments)).not.toThrow();
+ });
+
+ it("legacy 1.x zero-arg (arguments: '') → arguments '{}' not ''", () => {
+ const body = [
+ data({
+ event_type: "content.delta",
+ index: 0,
+ delta: { type: "function_call", name: "fn", arguments: "" },
+ }),
+ ].join("\n\n");
+ const result = collapseGeminiInteractionsSSE(body);
+ expect(result.toolCalls).toHaveLength(1);
+ expect(result.toolCalls![0].arguments).toBe("{}");
+ expect(() => JSON.parse(result.toolCalls![0].arguments)).not.toThrow();
+ });
+
+ it("valid non-empty args pass through unchanged (no regression)", () => {
+ const body = [
+ data({
+ event_type: "step.start",
+ index: 0,
+ step: { type: "function_call", id: "s1", name: "fn" },
+ }),
+ data({
+ event_type: "step.delta",
+ index: 0,
+ delta: { type: "arguments_delta", arguments: '{"x":1}' },
+ }),
+ ].join("\n\n");
+ const result = collapseGeminiInteractionsSSE(body);
+ expect(result.toolCalls![0].arguments).toBe('{"x":1}');
+ });
+
+ it("2.x truncated/unparseable accumulated args → arguments '{}' (never persists invalid JSON)", () => {
+ // arguments_delta fragments accumulate to a truncated, INVALID-JSON
+ // string (`{"city":`). The finalizer must NOT persist that — it would
+ // fail validateFixtures on reload (the loader does JSON.parse(arguments)).
+ const body = [
+ data({
+ event_type: "step.start",
+ index: 0,
+ step: { type: "function_call", id: "s1", name: "get_weather" },
+ }),
+ data({
+ event_type: "step.delta",
+ index: 0,
+ delta: { type: "arguments_delta", arguments: '{"city":' },
+ }),
+ ].join("\n\n");
+ const result = collapseGeminiInteractionsSSE(body);
+ expect(result.toolCalls).toHaveLength(1);
+ // RED before fix: persisted as the invalid '{"city":' string.
+ expect(result.toolCalls![0].arguments).toBe("{}");
+ expect(() => JSON.parse(result.toolCalls![0].arguments)).not.toThrow();
+ // The malformed assembly is still surfaced via droppedChunks accounting.
+ expect(result.droppedChunks).toBeGreaterThanOrEqual(1);
+
+ // The persisted fixture must reload cleanly: validateFixtures JSON.parses
+ // every toolCalls[].arguments and errors on unparseable JSON.
+ const fixture: Fixture = {
+ match: { userMessage: "weather", model: "gemini-2.0-flash" },
+ response: { toolCalls: result.toolCalls! },
+ };
+ const errors = validateFixtures([fixture]).filter((r) => r.severity === "error");
+ expect(errors).toEqual([]);
+ });
+
+ it("valid accumulated args reload cleanly through validateFixtures (no regression)", () => {
+ const body = [
+ data({
+ event_type: "step.start",
+ index: 0,
+ step: { type: "function_call", id: "s1", name: "get_weather" },
+ }),
+ data({
+ event_type: "step.delta",
+ index: 0,
+ delta: { type: "arguments_delta", arguments: '{"city":"SF"}' },
+ }),
+ ].join("\n\n");
+ const result = collapseGeminiInteractionsSSE(body);
+ expect(result.toolCalls![0].arguments).toBe('{"city":"SF"}');
+ expect(result.droppedChunks).toBeUndefined();
+ const fixture: Fixture = {
+ match: { userMessage: "weather", model: "gemini-2.0-flash" },
+ response: { toolCalls: result.toolCalls! },
+ };
+ const errors = validateFixtures([fixture]).filter((r) => r.severity === "error");
+ expect(errors).toEqual([]);
+ });
+
+ it("interleaved 2.x stream yields NO blocks (deliberate no-block decision)", () => {
+ const body = [
+ data({
+ event_type: "step.start",
+ index: 0,
+ step: { type: "function_call", id: "s1", name: "fn" },
+ }),
+ data({
+ event_type: "step.delta",
+ index: 0,
+ delta: { type: "arguments_delta", arguments: "{}" },
+ }),
+ data({
+ event_type: "content.delta",
+ index: 0,
+ delta: { type: "text", text: "after tool" },
+ }),
+ ].join("\n\n");
+ const result = collapseGeminiInteractionsSSE(body);
+ expect(result.blocks).toBeUndefined();
+ });
+ });
});
diff --git a/src/__tests__/ws-test-client.ts b/src/__tests__/ws-test-client.ts
index 04b23dd7..f1d08a1e 100644
--- a/src/__tests__/ws-test-client.ts
+++ b/src/__tests__/ws-test-client.ts
@@ -17,6 +17,12 @@ export interface WSTestClient {
send(data: string): void;
close(): void;
waitForMessages(count: number, timeoutMs?: number): Promise;
+ /**
+ * Returns a snapshot of every text message received so far. Unlike
+ * `waitForMessages`, it never blocks — use it after `waitForClose()` to count
+ * exactly what the server emitted before truncating/closing the socket.
+ */
+ getMessages(): string[];
waitForClose(): Promise;
/**
* Resolves with the RFC 6455 close frame's status code and reason once the
@@ -147,6 +153,9 @@ export function connectWebSocket(
messageResolvers.push(check);
});
},
+ getMessages(): string[] {
+ return messages.slice();
+ },
waitForClose(): Promise {
return new Promise((resolve) => {
if (socket.destroyed) {
diff --git a/src/bedrock-converse.ts b/src/bedrock-converse.ts
index 26b0e7c8..803bf568 100644
--- a/src/bedrock-converse.ts
+++ b/src/bedrock-converse.ts
@@ -12,6 +12,7 @@ import type {
ChatCompletionRequest,
ChatMessage,
Fixture,
+ FixtureBlock,
HandlerDefaults,
ResponseOverrides,
ToolCall,
@@ -23,6 +24,7 @@ import {
isTextResponse,
isToolCallResponse,
isContentWithToolCallsResponse,
+ resolveFixtureBlocks,
isErrorResponse,
flattenHeaders,
getContext,
@@ -181,7 +183,101 @@ function buildBedrockStreamContentWithToolCallsEvents(
logger: Logger,
reasoning?: string,
overrides?: ResponseOverrides,
+ blocks?: FixtureBlock[],
): Array<{ eventType: string; payload: object }> {
+ if (blocks && blocks.length > 0) {
+ // NEW PATH: stream `text`/`toolUse` content blocks in the fixture's array
+ // order. Converse's indexed contentBlock events make ordering observable —
+ // a `toolCall` block can take a lower `contentBlockIndex` than a `text`
+ // block. Indices are assigned in encounter order, continuing from any
+ // leading reasoning block (which occupies index 0).
+ const events: Array<{ eventType: string; payload: object }> = [
+ { eventType: "messageStart", payload: { role: "assistant" } },
+ ];
+
+ let blockIndex = 0;
+ if (reasoning) {
+ events.push({
+ eventType: "contentBlockStart",
+ payload: { contentBlockIndex: blockIndex, start: { reasoningContent: {} } },
+ });
+ for (let i = 0; i < reasoning.length; i += chunkSize) {
+ events.push({
+ eventType: "contentBlockDelta",
+ payload: {
+ contentBlockIndex: blockIndex,
+ delta: { reasoningContent: { text: reasoning.slice(i, i + chunkSize) } },
+ },
+ });
+ }
+ events.push({
+ eventType: "contentBlockStop",
+ payload: { contentBlockIndex: blockIndex },
+ });
+ blockIndex++;
+ }
+
+ const ordered = resolveFixtureBlocks(blocks);
+ for (const block of ordered) {
+ if (block.type === "text") {
+ events.push({
+ eventType: "contentBlockStart",
+ payload: { contentBlockIndex: blockIndex, start: {} },
+ });
+ for (let i = 0; i < block.text.length; i += chunkSize) {
+ events.push({
+ eventType: "contentBlockDelta",
+ payload: {
+ contentBlockIndex: blockIndex,
+ delta: { text: block.text.slice(i, i + chunkSize) },
+ },
+ });
+ }
+ events.push({
+ eventType: "contentBlockStop",
+ payload: { contentBlockIndex: blockIndex },
+ });
+ } else {
+ const toolUseId = block.id || generateToolUseId();
+ events.push({
+ eventType: "contentBlockStart",
+ payload: {
+ contentBlockIndex: blockIndex,
+ start: { toolUse: { toolUseId, name: block.name } },
+ },
+ });
+ const argsStr = parseConverseToolArgumentsForStream(
+ { name: block.name, arguments: block.arguments } as ToolCall,
+ logger,
+ );
+ for (let i = 0; i < argsStr.length; i += chunkSize) {
+ events.push({
+ eventType: "contentBlockDelta",
+ payload: {
+ contentBlockIndex: blockIndex,
+ delta: { toolUse: { input: argsStr.slice(i, i + chunkSize) } },
+ },
+ });
+ }
+ events.push({
+ eventType: "contentBlockStop",
+ payload: { contentBlockIndex: blockIndex },
+ });
+ }
+ blockIndex++;
+ }
+
+ events.push({
+ eventType: "messageStop",
+ payload: { stopReason: converseStopReason(overrides?.finishReason, "tool_use") },
+ });
+ events.push({
+ eventType: "metadata",
+ payload: { usage: converseUsage(overrides), metrics: { latencyMs: 0 } },
+ });
+ return events;
+ }
+
const events = buildBedrockStreamTextEvents(content, chunkSize, reasoning, overrides);
// Remove trailing metadata + messageStop events — we re-emit them after tool blocks
for (let i = events.length - 1; i >= 0; i--) {
@@ -495,6 +591,7 @@ function buildConverseContentWithToolCallsResponse(
logger: Logger,
reasoning?: string,
overrides?: ResponseOverrides,
+ blocks?: FixtureBlock[],
): object {
const contentBlocks: object[] = [];
if (reasoning) {
@@ -502,8 +599,11 @@ function buildConverseContentWithToolCallsResponse(
reasoningContent: { reasoningText: { text: reasoning } },
});
}
- contentBlocks.push({ text: content });
- for (const tc of toolCalls) {
+
+ // Build a Converse `toolUse` content block from a fixture tool call, parsing
+ // its string `arguments` into the object `input` Converse emits (warning on
+ // malformed JSON — same idiom as the legacy/streaming paths).
+ const toolUseBlock = (tc: { name: string; arguments: string; id?: string }): object => {
let argsObj: unknown;
try {
argsObj = JSON.parse(tc.arguments || "{}");
@@ -513,13 +613,38 @@ function buildConverseContentWithToolCallsResponse(
);
argsObj = {};
}
- contentBlocks.push({
+ return {
toolUse: {
toolUseId: tc.id || generateToolUseId(),
name: tc.name,
input: argsObj,
},
- });
+ };
+ };
+
+ if (blocks && blocks.length > 0) {
+ // NEW PATH: the non-streaming `content[]` array is positionally observable,
+ // so emit `text`/`toolUse` content blocks in the fixture's ARRAY ORDER
+ // (after any leading reasoning block). A toolCall block before a text block
+ // therefore yields a toolUse ahead of the text — matching the streaming
+ // path for the same `blocks` fixture.
+ const ordered = resolveFixtureBlocks(blocks);
+ for (const block of ordered) {
+ if (block.type === "text") {
+ contentBlocks.push({ text: block.text });
+ } else {
+ contentBlocks.push(
+ toolUseBlock({ name: block.name, arguments: block.arguments, id: block.id }),
+ );
+ }
+ }
+ } else {
+ // LEGACY PATH (unchanged): text content block, then toolUse blocks in
+ // `toolCalls` order.
+ contentBlocks.push({ text: content });
+ for (const tc of toolCalls) {
+ contentBlocks.push(toolUseBlock(tc));
+ }
}
return {
@@ -767,11 +892,12 @@ export async function handleConverse(
response: { status: 200, fixture },
});
const body = buildConverseContentWithToolCallsResponse(
- response.content,
- response.toolCalls,
+ response.content ?? "",
+ response.toolCalls ?? [],
logger,
effReasoning,
overrides,
+ response.blocks,
);
res.writeHead(200, { "Content-Type": "application/json" });
res.end(JSON.stringify(body));
@@ -1085,12 +1211,13 @@ export async function handleConverseStream(
response: { status: 200, fixture },
});
const events = buildBedrockStreamContentWithToolCallsEvents(
- response.content,
- response.toolCalls,
+ response.content ?? "",
+ response.toolCalls ?? [],
chunkSize,
logger,
effReasoning,
overrides,
+ response.blocks,
);
const interruption = createInterruptionSignal(fixture);
const completed = await writeEventStream(res, events, {
diff --git a/src/bedrock.ts b/src/bedrock.ts
index 1c83c49f..8b285967 100644
--- a/src/bedrock.ts
+++ b/src/bedrock.ts
@@ -22,6 +22,7 @@ import type {
ChatCompletionRequest,
ChatMessage,
Fixture,
+ FixtureBlock,
HandlerDefaults,
ResponseOverrides,
ToolCall,
@@ -39,6 +40,7 @@ import {
getContext,
getTestId,
resolveResponse,
+ resolveFixtureBlocks,
resolveReasoningForModel,
resolveStrictMode,
strictOverrideField,
@@ -333,6 +335,64 @@ function buildBedrockToolCallResponse(
};
}
+/**
+ * Build a Bedrock-invoke (Anthropic-style) non-streaming response whose
+ * `content[]` array honors the fixture's ordered `blocks`. Bedrock-invoke
+ * mirrors the Anthropic Messages content array, which is positionally
+ * observable, so a `toolCall` block ahead of a `text` block yields a `tool_use`
+ * content entry ahead of the `text` entry (tool-first is wire-expressible).
+ *
+ * Any leading thinking block (when reasoning is present) precedes the ordered
+ * blocks, matching the legacy builders. Reached only when `blocks` is non-empty;
+ * the legacy `{content, toolCalls}` text-first path is untouched.
+ */
+function buildBedrockBlocksResponse(
+ blocks: FixtureBlock[],
+ model: string,
+ logger: Logger,
+ reasoning?: string,
+ overrides?: ResponseOverrides,
+): object {
+ const contentBlocks: object[] = [];
+ if (reasoning) {
+ contentBlocks.push({ type: "thinking", thinking: reasoning, signature: "" });
+ }
+
+ const ordered = resolveFixtureBlocks(blocks);
+ for (const block of ordered) {
+ if (block.type === "text") {
+ contentBlocks.push({ type: "text", text: block.text });
+ } else {
+ let argsObj: unknown;
+ try {
+ argsObj = JSON.parse(block.arguments || "{}");
+ } catch {
+ logger.warn(
+ `Malformed JSON in fixture tool call arguments for "${block.name}": ${block.arguments}`,
+ );
+ argsObj = {};
+ }
+ contentBlocks.push({
+ type: "tool_use",
+ id: block.id || generateToolUseId(),
+ name: block.name,
+ input: argsObj,
+ });
+ }
+ }
+
+ return {
+ id: overrides?.id ?? generateMessageId(),
+ type: "message",
+ role: "assistant",
+ content: contentBlocks,
+ model: overrides?.model ?? model,
+ stop_reason: bedrockStopReason(overrides?.finishReason, "tool_use"),
+ stop_sequence: null,
+ usage: bedrockUsage(overrides),
+ };
+}
+
// ─── Request handler ────────────────────────────────────────────────────────
export async function handleBedrock(
@@ -565,14 +625,28 @@ export async function handleBedrock(
body: completionReq,
response: { status: 200, fixture },
});
+ if (response.blocks && response.blocks.length > 0) {
+ // NEW PATH: honor the fixture's ordered `blocks` in the positionally
+ // observable Anthropic-style content array (tool-first expressible).
+ const blocksBody = buildBedrockBlocksResponse(
+ response.blocks,
+ completionReq.model,
+ logger,
+ effReasoning,
+ overrides,
+ );
+ res.writeHead(200, { "Content-Type": "application/json" });
+ res.end(JSON.stringify(blocksBody));
+ return;
+ }
const textBody = buildBedrockTextResponse(
- response.content,
+ response.content ?? "",
completionReq.model,
effReasoning,
overrides,
);
const toolBody = buildBedrockToolCallResponse(
- response.toolCalls,
+ response.toolCalls ?? [],
completionReq.model,
logger,
// Reasoning is rendered by the text response in this merged path; pass
@@ -829,6 +903,7 @@ export function buildBedrockStreamContentWithToolCallsEvents(
logger: Logger,
reasoning?: string,
overrides?: ResponseOverrides,
+ blocks?: FixtureBlock[],
): Array<{ eventType: string; payload: object }> {
const events: Array<{ eventType: string; payload: object }> = [];
@@ -836,7 +911,8 @@ export function buildBedrockStreamContentWithToolCallsEvents(
let blockIndex = 0;
- // Thinking block (emitted before text when reasoning is present)
+ // Thinking block (emitted before text/tool_use when reasoning is present);
+ // shared by both the legacy and the ordered-`blocks` paths.
if (reasoning) {
events.push({
eventType: BEDROCK_INVOKE_STREAM_EVENT_TYPE,
@@ -872,6 +948,85 @@ export function buildBedrockStreamContentWithToolCallsEvents(
blockIndex++;
}
+ if (blocks && blocks.length > 0) {
+ // NEW PATH: emit `text`/`tool_use` content_block events in the fixture's
+ // array order. Bedrock-invoke binary `content_block_*` events are
+ // positional, so a `toolCall` block can take a lower `index` than a `text`
+ // block (tool-first is wire-expressible). Indices continue from any leading
+ // thinking block above.
+ const ordered = resolveFixtureBlocks(blocks);
+ for (const block of ordered) {
+ if (block.type === "text") {
+ events.push({
+ eventType: BEDROCK_INVOKE_STREAM_EVENT_TYPE,
+ payload: {
+ type: "content_block_start",
+ index: blockIndex,
+ content_block: { type: "text", text: "" },
+ },
+ });
+ for (let i = 0; i < block.text.length; i += chunkSize) {
+ const slice = block.text.slice(i, i + chunkSize);
+ events.push({
+ eventType: BEDROCK_INVOKE_STREAM_EVENT_TYPE,
+ payload: {
+ type: "content_block_delta",
+ index: blockIndex,
+ delta: { type: "text_delta", text: slice },
+ },
+ });
+ }
+ events.push({
+ eventType: BEDROCK_INVOKE_STREAM_EVENT_TYPE,
+ payload: { type: "content_block_stop", index: blockIndex },
+ });
+ blockIndex++;
+ } else {
+ const toolUseId = block.id || generateToolUseId();
+ events.push({
+ eventType: BEDROCK_INVOKE_STREAM_EVENT_TYPE,
+ payload: {
+ type: "content_block_start",
+ index: blockIndex,
+ content_block: {
+ type: "tool_use",
+ id: toolUseId,
+ name: block.name,
+ input: {},
+ },
+ },
+ });
+ const argsStr = parseToolArgumentsForStream(
+ { name: block.name, arguments: block.arguments } as ToolCall,
+ logger,
+ );
+ for (let i = 0; i < argsStr.length; i += chunkSize) {
+ const slice = argsStr.slice(i, i + chunkSize);
+ events.push({
+ eventType: BEDROCK_INVOKE_STREAM_EVENT_TYPE,
+ payload: {
+ type: "content_block_delta",
+ index: blockIndex,
+ delta: { type: "input_json_delta", partial_json: slice },
+ },
+ });
+ }
+ events.push({
+ eventType: BEDROCK_INVOKE_STREAM_EVENT_TYPE,
+ payload: { type: "content_block_stop", index: blockIndex },
+ });
+ blockIndex++;
+ }
+ }
+
+ events.push(
+ buildBedrockInvokeMessageDelta(bedrockStopReason(overrides?.finishReason, "tool_use")),
+ );
+ events.push(buildBedrockInvokeMessageStop());
+ return events;
+ }
+
+ // LEGACY PATH (unchanged): text block, then tool_use blocks in toolCalls order.
// Text block
events.push({
eventType: BEDROCK_INVOKE_STREAM_EVENT_TYPE,
@@ -1279,13 +1434,14 @@ export async function handleBedrockStream(
response: { status: 200, fixture },
});
const events = buildBedrockStreamContentWithToolCallsEvents(
- response.content,
- response.toolCalls,
+ response.content ?? "",
+ response.toolCalls ?? [],
completionReq.model,
chunkSize,
logger,
effReasoning,
overrides,
+ response.blocks,
);
const interruption = createInterruptionSignal(fixture);
const completed = await writeEventStream(res, events, {
diff --git a/src/cohere.ts b/src/cohere.ts
index 75949f30..a40e679c 100644
--- a/src/cohere.ts
+++ b/src/cohere.ts
@@ -14,6 +14,7 @@ import type {
ChatCompletionRequest,
ChatMessage,
Fixture,
+ FixtureBlock,
HandlerDefaults,
RecordedTimings,
ResponseOverrides,
@@ -34,6 +35,7 @@ import {
serializeErrorResponse,
flattenHeaders,
getTestId,
+ resolveFixtureBlocks,
resolveResponse,
resolveStrictMode,
resolveReasoningForModel,
@@ -316,8 +318,27 @@ function buildCohereContentWithToolCallsResponse(
logger: Logger,
reasoning?: string,
overrides?: ResponseOverrides,
+ blocks?: FixtureBlock[],
): object {
- const cohereCalls = toolCalls.map((tc) => {
+ // Cohere's non-streaming response keeps text in `message.content[]` and tool
+ // calls in the SEPARATE `message.tool_calls[]` field, so the relative ORDER
+ // of a text vs. toolCall block is NOT observable on the wire (unlike the
+ // ordered streaming events, or Anthropic/Gemini's single ordered array).
+ // When `blocks` is present we therefore derive both fields FROM the blocks
+ // (so a blocks-only fixture still produces correct output) but make no
+ // ordering guarantee between the two fields. Legacy fixtures use the
+ // `content` + `toolCalls` inputs unchanged.
+ // Resolve the blocks exactly once (pure: validate + copy, no id-gen) and
+ // reuse the single result for BOTH the tool-call and text derivation below.
+ const resolvedBlocks = blocks && blocks.length > 0 ? resolveFixtureBlocks(blocks) : undefined;
+
+ const effectiveToolCalls: ToolCall[] = resolvedBlocks
+ ? resolvedBlocks
+ .filter((b): b is Extract => b.type === "toolCall")
+ .map((b) => ({ name: b.name, arguments: b.arguments, id: b.id }))
+ : toolCalls;
+
+ const cohereCalls = effectiveToolCalls.map((tc) => {
let argsJson: string;
try {
JSON.parse(tc.arguments || "{}");
@@ -338,11 +359,25 @@ function buildCohereContentWithToolCallsResponse(
};
});
+ // For the blocks path, derive text only from actual text blocks. A tool-only
+ // blocks fixture has no text block, so it must NOT emit a spurious empty
+ // `{ type: "text", text: "" }` entry (real Cohere wouldn't). The legacy
+ // (no-blocks) path is unchanged: it always emits the `content` text entry.
+ const textBlocks = resolvedBlocks?.filter(
+ (b): b is Extract => b.type === "text",
+ );
+ const hasTextEntry = resolvedBlocks ? (textBlocks?.length ?? 0) > 0 : true;
+ const effectiveContent: string = resolvedBlocks
+ ? (textBlocks ?? []).map((b) => b.text).join("")
+ : content;
+
const contentBlocks: { type: string; text: string }[] = [];
if (reasoning) {
contentBlocks.push({ type: "text", text: reasoning });
}
- contentBlocks.push({ type: "text", text: content });
+ if (hasTextEntry) {
+ contentBlocks.push({ type: "text", text: effectiveContent });
+ }
return {
id: overrides?.id ?? generateMessageId(),
@@ -579,6 +614,7 @@ function buildCohereContentWithToolCallsStreamEvents(
logger: Logger,
reasoning?: string,
overrides?: ResponseOverrides,
+ blocks?: FixtureBlock[],
): CohereSSEEvent[] {
const msgId = overrides?.id ?? generateMessageId();
const events: CohereSSEEvent[] = [];
@@ -619,6 +655,90 @@ function buildCohereContentWithToolCallsStreamEvents(
contentIndex++;
}
+ if (blocks && blocks.length > 0) {
+ // NEW path (#274): emit Cohere SSE events in the blocks' ARRAY ORDER so a
+ // tool-first / interleaved fixture streams its tool call before its text.
+ // Cohere v2 events are ordered, so tool-first is wire-expressible. The
+ // tool-plan-delta is emitted once before the first toolCall block (Cohere
+ // requires it preceding tool calls). Legacy fixtures (no blocks) skip this.
+ const resolved = resolveFixtureBlocks(blocks);
+ let toolPlanEmitted = false;
+ let toolIdx = 0;
+ resolved.forEach((block) => {
+ if (block.type === "toolCall") {
+ if (!toolPlanEmitted) {
+ events.push({
+ type: "tool-plan-delta",
+ delta: { message: { tool_plan: "I will use the requested tool." } },
+ });
+ toolPlanEmitted = true;
+ }
+ const callId = block.id || generateToolCallId();
+ let argsJson: string;
+ try {
+ JSON.parse(block.arguments || "{}");
+ argsJson = block.arguments || "{}";
+ } catch {
+ logger.warn(
+ `Malformed JSON in fixture tool call arguments for "${block.name}": ${block.arguments}`,
+ );
+ argsJson = "{}";
+ }
+ events.push({
+ type: "tool-call-start",
+ index: toolIdx,
+ delta: {
+ message: {
+ tool_calls: {
+ id: callId,
+ type: "function",
+ function: { name: block.name, arguments: "" },
+ },
+ },
+ },
+ });
+ for (let i = 0; i < argsJson.length; i += chunkSize) {
+ events.push({
+ type: "tool-call-delta",
+ index: toolIdx,
+ delta: {
+ message: {
+ tool_calls: { function: { arguments: argsJson.slice(i, i + chunkSize) } },
+ },
+ },
+ });
+ }
+ events.push({ type: "tool-call-end", index: toolIdx });
+ toolIdx++;
+ } else {
+ events.push({
+ type: "content-start",
+ index: contentIndex,
+ delta: { message: { content: { type: "text" } } },
+ });
+ for (let i = 0; i < block.text.length; i += chunkSize) {
+ events.push({
+ type: "content-delta",
+ index: contentIndex,
+ delta: {
+ message: { content: { type: "text", text: block.text.slice(i, i + chunkSize) } },
+ },
+ });
+ }
+ events.push({ type: "content-end", index: contentIndex });
+ contentIndex++;
+ }
+ });
+ events.push({
+ type: "message-end",
+ delta: {
+ finish_reason: cohereFinishReason(overrides?.finishReason, "TOOL_CALL"),
+ usage: cohereUsage(overrides),
+ },
+ });
+ return events;
+ }
+
// content-start (type: "text" only, no text field)
events.push({
type: "content-start",
@@ -1026,22 +1146,24 @@ export async function handleCohere(
});
if (cohereReq.stream !== true) {
const body = buildCohereContentWithToolCallsResponse(
- response.content,
- response.toolCalls,
+ response.content ?? "",
+ response.toolCalls ?? [],
logger,
effReasoning,
overrides,
+ response.blocks,
);
res.writeHead(200, { "Content-Type": "application/json" });
res.end(JSON.stringify(body));
} else {
const events = buildCohereContentWithToolCallsStreamEvents(
- response.content,
- response.toolCalls,
+ response.content ?? "",
+ response.toolCalls ?? [],
chunkSize,
logger,
effReasoning,
overrides,
+ response.blocks,
);
const interruption = createInterruptionSignal(fixture);
const completed = await writeCohereSSEStream(res, events, {
diff --git a/src/fixture-loader.ts b/src/fixture-loader.ts
index 498749e6..9d911fcb 100644
--- a/src/fixture-loader.ts
+++ b/src/fixture-loader.ts
@@ -277,7 +277,7 @@ function validateWebSearches(
}
function validateBlocks(
- response: { blocks?: unknown },
+ response: { blocks?: unknown; content?: unknown; toolCalls?: unknown },
fixtureIndex: number,
results: ValidationResult[],
): void {
@@ -321,6 +321,14 @@ function validateBlocks(
fixtureIndex,
message: `blocks[${j}].text must be a string, got ${typeof block.text}`,
});
+ } else if (block.text === "") {
+ // Mirror the content/toolCalls "empty string" rejection: an empty-text
+ // block produces a meaningless/spurious wire chunk on replay.
+ results.push({
+ severity: "error",
+ fixtureIndex,
+ message: `blocks[${j}].text is empty string`,
+ });
}
} else {
// toolCall block — mirror toolCalls[] name + arguments checks.
@@ -359,6 +367,64 @@ function validateBlocks(
}
}
}
+
+ // blocks-vs-content/toolCalls divergence (#274 P2). When a fixture carries
+ // BOTH `blocks` AND legacy `content`/`toolCalls` (allowed but unusual now
+ // that blocks-only is first-class), builders stream `blocks` and IGNORE the
+ // redundant `content`/`toolCalls`. If those disagree it is a silent footgun,
+ // so WARN (not a hard error). Stay silent on the clean blocks-only path
+ // (neither legacy field present) — that is the intended shape.
+ const hasLegacyContent = typeof response.content === "string";
+ const hasLegacyToolCalls = Array.isArray(response.toolCalls);
+ if ((hasLegacyContent || hasLegacyToolCalls) && Array.isArray(response.blocks)) {
+ const textBlocks = response.blocks.filter(
+ (b): b is { type: "text"; text: string } =>
+ b != null &&
+ typeof b === "object" &&
+ (b as { type?: unknown }).type === "text" &&
+ typeof (b as { text?: unknown }).text === "string",
+ );
+ const toolCallBlockNames = response.blocks
+ .filter(
+ (b): b is { type: "toolCall"; name: string } =>
+ b != null &&
+ typeof b === "object" &&
+ (b as { type?: unknown }).type === "toolCall" &&
+ typeof (b as { name?: unknown }).name === "string",
+ )
+ .map((b) => b.name);
+
+ // Text divergence: blocks' concatenated text vs legacy `content`.
+ if (hasLegacyContent) {
+ const blocksText = textBlocks.map((b) => b.text).join("");
+ if (blocksText !== response.content) {
+ results.push({
+ severity: "warning",
+ fixtureIndex,
+ message:
+ "blocks text diverges from content — builders stream blocks and ignore the redundant content field",
+ });
+ }
+ }
+
+ // ToolCall divergence: blocks' ordered toolCall names vs legacy `toolCalls`.
+ if (hasLegacyToolCalls) {
+ const legacyNames = (response.toolCalls as Array<{ name?: unknown }>).map((tc) =>
+ typeof tc?.name === "string" ? tc.name : undefined,
+ );
+ const sameNames =
+ legacyNames.length === toolCallBlockNames.length &&
+ legacyNames.every((n, k) => n === toolCallBlockNames[k]);
+ if (!sameNames) {
+ results.push({
+ severity: "warning",
+ fixtureIndex,
+ message:
+ "blocks toolCalls diverge from toolCalls — builders stream blocks and ignore the redundant toolCalls field",
+ });
+ }
+ }
+ }
}
export function validateFixtures(fixtures: Fixture[]): ValidationResult[] {
@@ -400,9 +466,19 @@ export function validateFixtures(fixtures: Fixture[]): ValidationResult[] {
});
}
+ // When a non-empty ordered `blocks` array is present, the builders stream
+ // `blocks` and IGNORE the legacy `content` mirror (see validateBlocks's
+ // divergence note + isContentWithToolCallsResponse's BLOCKS-ONLY clause).
+ // So an empty-string `content` is harmless in that case and must NOT raise
+ // the "content is empty string" hard error. Fixtures WITHOUT blocks keep
+ // the error (an empty content with no blocks produces no output).
+ const hasNonEmptyBlocks =
+ Array.isArray((response as { blocks?: unknown }).blocks) &&
+ (response as { blocks: unknown[] }).blocks.length > 0;
+
// Text response checks
if (isTextResponse(response)) {
- if (response.content === "") {
+ if (response.content === "" && !hasNonEmptyBlocks) {
results.push({
severity: "error",
fixtureIndex: i,
@@ -415,38 +491,48 @@ export function validateFixtures(fixtures: Fixture[]): ValidationResult[] {
// ContentWithToolCalls response checks
if (isContentWithToolCallsResponse(response)) {
- if (response.content === "") {
- results.push({
- severity: "error",
- fixtureIndex: i,
- message: "content is empty string",
- });
- }
- if (response.toolCalls.length === 0) {
- results.push({
- severity: "warning",
- fixtureIndex: i,
- message: "toolCalls array is empty — fixture will never produce tool calls",
- });
- }
- for (let j = 0; j < response.toolCalls.length; j++) {
- const tc = response.toolCalls[j];
- if (!tc.name) {
+ // The guard now also matches a BLOCKS-ONLY fixture (non-empty `blocks`,
+ // no `content`/`toolCalls`). For that shape the content/toolCalls checks
+ // below don't apply (and `content`/`toolCalls` are undefined) — the
+ // ordered `blocks` array is validated separately by `validateBlocks`
+ // immediately after this block. So gate the legacy field checks on the
+ // fields actually being present, mirroring the builders' branch-on-blocks.
+ if (typeof response.content === "string") {
+ if (response.content === "" && !hasNonEmptyBlocks) {
results.push({
severity: "error",
fixtureIndex: i,
- message: `toolCalls[${j}].name is empty`,
+ message: "content is empty string",
});
}
- try {
- JSON.parse(tc.arguments);
- } catch {
+ }
+ if (Array.isArray(response.toolCalls)) {
+ if (response.toolCalls.length === 0) {
results.push({
- severity: "error",
+ severity: "warning",
fixtureIndex: i,
- message: `toolCalls[${j}].arguments is not valid JSON: ${tc.arguments}`,
+ message: "toolCalls array is empty — fixture will never produce tool calls",
});
}
+ for (let j = 0; j < response.toolCalls.length; j++) {
+ const tc = response.toolCalls[j];
+ if (!tc.name) {
+ results.push({
+ severity: "error",
+ fixtureIndex: i,
+ message: `toolCalls[${j}].name is empty`,
+ });
+ }
+ try {
+ JSON.parse(tc.arguments);
+ } catch {
+ results.push({
+ severity: "error",
+ fixtureIndex: i,
+ message: `toolCalls[${j}].arguments is not valid JSON: ${tc.arguments}`,
+ });
+ }
+ }
}
validateReasoning(response, i, results);
validateWebSearches(response, i, results);
@@ -455,7 +541,11 @@ export function validateFixtures(fixtures: Fixture[]): ValidationResult[] {
// Optional ordered `blocks` checks — validated whenever present on the
// response, regardless of which content/toolCalls guard matched, so a
// malformed blocks array is rejected at LOAD rather than at dispatch.
- validateBlocks(response as { blocks?: unknown }, i, results);
+ validateBlocks(
+ response as { blocks?: unknown; content?: unknown; toolCalls?: unknown },
+ i,
+ results,
+ );
// Tool call response checks
if (isToolCallResponse(response)) {
diff --git a/src/gemini-interactions.ts b/src/gemini-interactions.ts
index 833b558c..7df60576 100644
--- a/src/gemini-interactions.ts
+++ b/src/gemini-interactions.ts
@@ -12,6 +12,7 @@ import type {
ChatCompletionRequest,
ChatMessage,
Fixture,
+ FixtureBlock,
HandlerDefaults,
RecordedTimings,
ResponseOverrides,
@@ -34,6 +35,7 @@ import {
strictOverrideField,
strictNoMatchMessage,
strictNoMatchLogLine,
+ resolveFixtureBlocks,
} from "./helpers.js";
import { matchFixtureDiagnostic } from "./router.js";
import { writeErrorResponse, delay, calculateDelay } from "./sse-writer.js";
@@ -385,10 +387,41 @@ export function buildInteractionsContentWithToolCallsResponse(
interactionId: string,
logger: Logger,
overrides?: ResponseOverrides,
+ blocks?: FixtureBlock[],
): object {
- const steps: object[] = [{ type: "model_output", content: [{ type: "text", text: content }] }];
- for (const tc of toolCalls) {
- steps.push(buildFunctionCallStep(tc, logger));
+ const steps: object[] = [];
+ // Collect output_text in step order so the top-level field mirrors the
+ // concatenated text steps regardless of where they appear in `steps`.
+ let outputText = "";
+
+ if (blocks && blocks.length > 0) {
+ // NEW PATH: the non-stream `steps[]` array is index/step-addressed and
+ // ordered, so emit one step per block in fixture ARRAY ORDER. A toolCall
+ // block placed before a text block therefore yields a function_call step
+ // ahead of the model_output step — tool-first, the opposite of the legacy
+ // (text-step-always-first) shape below.
+ const ordered = resolveFixtureBlocks(blocks);
+ for (const block of ordered) {
+ if (block.type === "text") {
+ steps.push({ type: "model_output", content: [{ type: "text", text: block.text }] });
+ outputText += block.text;
+ } else {
+ steps.push(
+ buildFunctionCallStep(
+ { name: block.name, arguments: block.arguments, id: block.id },
+ logger,
+ ),
+ );
+ }
+ }
+ } else {
+ // LEGACY PATH: a single text step first, then function_call steps —
+ // unchanged from the pre-blocks behavior (text always leads `steps`).
+ steps.push({ type: "model_output", content: [{ type: "text", text: content }] });
+ outputText = content;
+ for (const tc of toolCalls) {
+ steps.push(buildFunctionCallStep(tc, logger));
+ }
}
return {
@@ -396,7 +429,7 @@ export function buildInteractionsContentWithToolCallsResponse(
status: "requires_action",
model: overrides?.model ?? model,
role: "model",
- output_text: content,
+ output_text: outputText,
steps,
usage: interactionsUsage(overrides),
};
@@ -570,27 +603,18 @@ export function buildInteractionsToolCallSSEEvents(
return events;
}
-export function buildInteractionsContentWithToolCallsSSEEvents(
+// Emit the step.start/delta(s)/stop bracket for a text (model_output) step at
+// a given step `index`. Inner delta shape ({ type: "text", text }) and the
+// empty-content single-empty-delta behavior are unchanged from the legacy path.
+function pushTextStepEvents(
+ events: InteractionsSSEEvent[],
+ index: number,
content: string,
- toolCalls: ToolCall[],
- interactionId: string,
chunkSize: number,
- logger: Logger,
- overrides?: ResponseOverrides,
-): InteractionsSSEEvent[] {
- const events: InteractionsSSEEvent[] = [];
-
- // interaction.created
- events.push({
- event_type: "interaction.created",
- interaction: { id: interactionId, status: "in_progress" },
- event_id: nextEventId(),
- });
-
- // Text content at index 0 (model_output step)
+): void {
events.push({
event_type: "step.start",
- index: 0,
+ index,
step: { type: "model_output" },
event_id: nextEventId(),
});
@@ -598,7 +622,7 @@ export function buildInteractionsContentWithToolCallsSSEEvents(
if (content.length === 0) {
events.push({
event_type: "step.delta",
- index: 0,
+ index,
delta: { type: "text", text: "" },
event_id: nextEventId(),
});
@@ -607,7 +631,7 @@ export function buildInteractionsContentWithToolCallsSSEEvents(
const slice = content.slice(i, i + chunkSize);
events.push({
event_type: "step.delta",
- index: 0,
+ index,
delta: { type: "text", text: slice },
event_id: nextEventId(),
});
@@ -616,51 +640,103 @@ export function buildInteractionsContentWithToolCallsSSEEvents(
events.push({
event_type: "step.stop",
- index: 0,
+ index,
event_id: nextEventId(),
});
+}
- // Tool calls at index 1+ (identity on step.start, args as arguments_delta)
- for (let i = 0; i < toolCalls.length; i++) {
- const tc = toolCalls[i];
- const idx = i + 1; // offset by 1 because text is index 0
- let argsObj: unknown;
- try {
- argsObj = JSON.parse(tc.arguments || "{}");
- } catch {
- logger.warn(
- `Malformed JSON in fixture tool call arguments for "${tc.name}": ${tc.arguments}`,
- );
- argsObj = {};
- }
+// Emit the step.start/arguments_delta/stop bracket for a function_call step at
+// a given step `index`. Identity (id, name) lives on step.start with an empty
+// `arguments: {}` placeholder; the arguments stream as a single
+// `arguments_delta` JSON-string fragment — unchanged from the legacy path.
+function pushFunctionCallStepEvents(
+ events: InteractionsSSEEvent[],
+ index: number,
+ tc: ToolCall,
+ logger: Logger,
+): void {
+ let argsObj: unknown;
+ try {
+ argsObj = JSON.parse(tc.arguments || "{}");
+ } catch {
+ logger.warn(`Malformed JSON in fixture tool call arguments for "${tc.name}": ${tc.arguments}`);
+ argsObj = {};
+ }
- events.push({
- event_type: "step.start",
- index: idx,
- step: {
- type: "function_call",
- id: tc.id || generateToolCallId(),
- name: tc.name,
- arguments: {},
- },
- event_id: nextEventId(),
- });
+ events.push({
+ event_type: "step.start",
+ index,
+ step: {
+ type: "function_call",
+ id: tc.id || generateToolCallId(),
+ name: tc.name,
+ arguments: {},
+ },
+ event_id: nextEventId(),
+ });
- events.push({
- event_type: "step.delta",
- index: idx,
- delta: {
- type: "arguments_delta",
- arguments: JSON.stringify(argsObj),
- },
- event_id: nextEventId(),
- });
+ events.push({
+ event_type: "step.delta",
+ index,
+ delta: {
+ type: "arguments_delta",
+ arguments: JSON.stringify(argsObj),
+ },
+ event_id: nextEventId(),
+ });
- events.push({
- event_type: "step.stop",
- index: idx,
- event_id: nextEventId(),
- });
+ events.push({
+ event_type: "step.stop",
+ index,
+ event_id: nextEventId(),
+ });
+}
+
+export function buildInteractionsContentWithToolCallsSSEEvents(
+ content: string,
+ toolCalls: ToolCall[],
+ interactionId: string,
+ chunkSize: number,
+ logger: Logger,
+ overrides?: ResponseOverrides,
+ blocks?: FixtureBlock[],
+): InteractionsSSEEvent[] {
+ const events: InteractionsSSEEvent[] = [];
+
+ // interaction.created
+ events.push({
+ event_type: "interaction.created",
+ interaction: { id: interactionId, status: "in_progress" },
+ event_id: nextEventId(),
+ });
+
+ if (blocks && blocks.length > 0) {
+ // NEW PATH: stream one step per block in fixture ARRAY ORDER. The step
+ // `index` increments with array position, so a toolCall block before a text
+ // block yields a function_call step at a LOWER index than the model_output
+ // step — tool-first, the opposite of the legacy (text-at-index-0) shape.
+ const ordered = resolveFixtureBlocks(blocks);
+ let idx = 0;
+ for (const block of ordered) {
+ if (block.type === "text") {
+ pushTextStepEvents(events, idx, block.text, chunkSize);
+ } else {
+ pushFunctionCallStepEvents(
+ events,
+ idx,
+ { name: block.name, arguments: block.arguments, id: block.id },
+ logger,
+ );
+ }
+ idx += 1;
+ }
+ } else {
+ // LEGACY PATH: text content at index 0 (model_output step), tool calls at
+ // index 1+ — byte-for-byte unchanged from the pre-blocks behavior.
+ pushTextStepEvents(events, 0, content, chunkSize);
+ for (let i = 0; i < toolCalls.length; i++) {
+ pushFunctionCallStepEvents(events, i + 1, toolCalls[i], logger);
+ }
}
// interaction.completed
@@ -930,23 +1006,25 @@ export async function handleGeminiInteractions(
});
if (!streaming) {
const body = buildInteractionsContentWithToolCallsResponse(
- response.content,
- response.toolCalls,
+ response.content ?? "",
+ response.toolCalls ?? [],
model,
interactionId,
logger,
overrides,
+ response.blocks,
);
res.writeHead(200, { "Content-Type": "application/json" });
res.end(JSON.stringify(body));
} else {
const events = buildInteractionsContentWithToolCallsSSEEvents(
- response.content,
- response.toolCalls,
+ response.content ?? "",
+ response.toolCalls ?? [],
interactionId,
chunkSize,
logger,
overrides,
+ response.blocks,
);
const interruption = createInterruptionSignal(fixture);
const completed = await writeGeminiInteractionsSSEStream(res, events, {
diff --git a/src/gemini.ts b/src/gemini.ts
index b7b349b0..890d761b 100644
--- a/src/gemini.ts
+++ b/src/gemini.ts
@@ -1001,8 +1001,8 @@ export async function handleGemini(
});
if (!streaming) {
const body = buildGeminiContentWithToolCallsResponse(
- response.content,
- response.toolCalls,
+ response.content ?? "",
+ response.toolCalls ?? [],
logger,
effReasoning,
overrides,
@@ -1012,8 +1012,8 @@ export async function handleGemini(
res.end(JSON.stringify(body));
} else {
const chunks = buildGeminiContentWithToolCallsStreamChunks(
- response.content,
- response.toolCalls,
+ response.content ?? "",
+ response.toolCalls ?? [],
chunkSize,
logger,
effReasoning,
diff --git a/src/helpers.ts b/src/helpers.ts
index 40af7402..3f5f7a5b 100644
--- a/src/helpers.ts
+++ b/src/helpers.ts
@@ -22,6 +22,7 @@ import type {
SSEChunk,
ToolCall,
FixtureBlock,
+ FixtureFileBlock,
ChatCompletion,
ResponseOverrides,
} from "./types.js";
@@ -289,12 +290,25 @@ export function isToolCallResponse(r: FixtureResponse): r is ToolCallResponse {
export function isContentWithToolCallsResponse(
r: FixtureResponse,
): r is ContentWithToolCallsResponse {
- return (
+ const o = r as ContentWithToolCallsResponse;
+ // LEGACY / COMBINED shape — BOTH content (string) + toolCalls (array). This
+ // clause is byte-identical to the original guard, so every fixture that
+ // matched before still matches here and is classified exactly as before.
+ const hasContentAndToolCalls =
"content" in r &&
- typeof (r as ContentWithToolCallsResponse).content === "string" &&
+ typeof o.content === "string" &&
"toolCalls" in r &&
- Array.isArray((r as ContentWithToolCallsResponse).toolCalls)
- );
+ Array.isArray(o.toolCalls);
+ // BLOCKS-ONLY shape (additive, #274 F0) — a non-empty `blocks` array with no
+ // content/toolCalls. This is a pure RELAXATION: it recognizes MORE, never
+ // reclassifies an existing fixture. A blocks-only fixture cannot be claimed by
+ // any earlier/looser guard in the dispatch order — `isTextResponse` requires a
+ // string `content` AND `!("toolCalls" in r)`, and `isToolCallResponse`
+ // requires a `toolCalls` array — so it would otherwise fall through to 500.
+ // `isAudioResponse` (checked first everywhere) requires an `audio` field, which
+ // blocks-only lacks, so there is no overlap there either.
+ const hasNonEmptyBlocks = Array.isArray(o.blocks) && o.blocks.length > 0;
+ return hasContentAndToolCalls || hasNonEmptyBlocks;
}
/**
@@ -310,18 +324,32 @@ export function isContentWithToolCallsResponse(
* is the single source of truth for "has blocks". This validator therefore only
* ever runs on a non-empty array.
*
- * Returns the blocks in array order. Each entry must be a valid
- * {@link FixtureBlock}: a `text` block with a string `text`, or a `toolCall`
- * block with string `name` + `arguments` (and an optional string `id`).
- * Throws on a malformed array or entry — same fail-fast idiom as the other
- * fixture validators in this module (see e.g. the factory guard at
- * {@link resolveResponse}).
+ * Accepts the relaxed on-disk {@link FixtureFileBlock} input shape — a
+ * `toolCall` block's `arguments` may be a string OR a JSON object/array — which
+ * makes the object-tolerance below type-visible and mirrors how
+ * normalizeResponse types its file-form input (see {@link FixtureFileBlock} /
+ * {@link FixtureFileContentWithToolCallsResponse}). The in-memory
+ * {@link FixtureBlock} form (string `arguments`) is a structural subtype, so
+ * existing callers that pass `FixtureBlock[]` continue to type-check.
+ *
+ * Returns the blocks in array order, NORMALIZED to {@link FixtureBlock}: a
+ * `text` block with a string `text`, or a `toolCall` block with string `name` +
+ * string `arguments` (object/array `arguments` is JSON.stringified) and an
+ * optional string `id`. The return type guarantees `arguments: string` — every
+ * caller relies on that. Throws on a malformed array or entry — same fail-fast
+ * idiom as the other fixture validators in this module (see e.g. the factory
+ * guard at {@link resolveResponse}).
*/
-export function resolveFixtureBlocks(blocks: FixtureBlock[]): FixtureBlock[] {
+export function resolveFixtureBlocks(blocks: FixtureFileBlock[]): FixtureBlock[] {
if (!Array.isArray(blocks)) {
throw new Error(`Invalid fixture blocks: expected an array, got ${typeof blocks}`);
}
- blocks.forEach((block, i) => {
+ // Validate each block and return a normalized COPY. Builders iterate the
+ // result and must not observe later mutations of — nor be able to mutate —
+ // the caller's stored fixture array, and block objects are consumed read-only
+ // downstream, so we never mutate the input in place: any normalization (e.g.
+ // stringifying object `arguments`) is applied to a fresh per-block copy.
+ return blocks.map((block, i) => {
if (block === null || typeof block !== "object") {
throw new Error(`Invalid fixture block at index ${i}: expected an object`);
}
@@ -332,10 +360,11 @@ export function resolveFixtureBlocks(blocks: FixtureBlock[]): FixtureBlock[] {
`Invalid fixture block at index ${i}: "text" block requires a string "text" field`,
);
}
+ return { type: "text", text: b.text };
} else if (b.type === "toolCall") {
- if (typeof b.name !== "string" || typeof b.arguments !== "string") {
+ if (typeof b.name !== "string") {
throw new Error(
- `Invalid fixture block at index ${i}: "toolCall" block requires string "name" and "arguments" fields`,
+ `Invalid fixture block at index ${i}: "toolCall" block requires a string "name" field`,
);
}
if (b.id !== undefined && typeof b.id !== "string") {
@@ -343,13 +372,29 @@ export function resolveFixtureBlocks(blocks: FixtureBlock[]): FixtureBlock[] {
`Invalid fixture block at index ${i}: "toolCall" block "id" must be a string when present`,
);
}
+ // `arguments` is a JSON string in normalized (file-load) form. The
+ // programmatic path (addFixture/addFixtures/prependFixture) stores RAW
+ // fixtures with no normalizeResponse pass, so an OBJECT `arguments` can
+ // reach here. Be tolerant: stringify an object/array (mirroring
+ // normalizeResponse's `JSON.stringify`) into a fresh block copy so the
+ // programmatic path is safe and the caller's stored fixture is untouched.
+ // A string stays byte-identical (file-load path unchanged); any other
+ // type is still rejected.
+ if (typeof b.arguments === "object" && b.arguments !== null) {
+ return { ...b, arguments: JSON.stringify(b.arguments) } as unknown as FixtureBlock;
+ }
+ if (typeof b.arguments !== "string") {
+ throw new Error(
+ `Invalid fixture block at index ${i}: "toolCall" block requires a string or object "arguments" field`,
+ );
+ }
+ return { ...b, type: "toolCall", name: b.name, arguments: b.arguments } as FixtureBlock;
} else {
throw new Error(
`Invalid fixture block at index ${i}: unknown type ${JSON.stringify(b.type)} (expected "text" or "toolCall")`,
);
}
});
- return blocks;
}
export function isErrorResponse(r: FixtureResponse): r is ErrorResponse {
diff --git a/src/messages.ts b/src/messages.ts
index c0fbc7bb..b3fcf149 100644
--- a/src/messages.ts
+++ b/src/messages.ts
@@ -1455,8 +1455,8 @@ export async function handleMessages(
});
if (claudeReq.stream !== true) {
const body = buildClaudeContentWithToolCallsResponse(
- response.content,
- response.toolCalls,
+ response.content ?? "",
+ response.toolCalls ?? [],
completionReq.model,
logger,
effReasoning,
@@ -1469,8 +1469,8 @@ export async function handleMessages(
res.end(JSON.stringify(body));
} else {
const events = buildClaudeContentWithToolCallsStreamEvents(
- response.content,
- response.toolCalls,
+ response.content ?? "",
+ response.toolCalls ?? [],
completionReq.model,
chunkSize,
logger,
diff --git a/src/ollama.ts b/src/ollama.ts
index 88a6b5c3..ac553382 100644
--- a/src/ollama.ts
+++ b/src/ollama.ts
@@ -492,20 +492,42 @@ function buildOllamaChatContentWithToolCallsChunks(
}
// NOTE (#274): this NON-streaming Ollama builder is intentionally degenerate
-// w.r.t. `blocks` ordering. Ollama's non-streaming chat response puts `content`
+// w.r.t. `blocks` ORDERING. Ollama's non-streaming chat response puts `content`
// and `tool_calls` in SEPARATE fields on a single `message` object — they are
// NOT a positionally-observable array, so a tool-first `blocks` fixture cannot
-// be expressed in the wire shape. Honoring block order here would be a no-op,
-// so we keep the legacy text+tool_calls fields unchanged. (Order-observable
-// surfaces — Claude `content[]`, Gemini `parts[]`, Responses `output[]` — DO
-// honor block order; see those builders.)
+// be expressed in the wire shape. Block ORDER is therefore a no-op here.
+// The PAYLOAD, however, is still derived from `blocks` when present (the body
+// backfills `content`/`tool_calls` from the blocks — see the comment at the
+// top of the function body); only the relative ordering of those fields is
+// unobservable. (Order-observable surfaces — Claude `content[]`, Gemini
+// `parts[]`, Responses `output[]` — DO honor block order; see those builders.)
function buildOllamaChatContentWithToolCallsResponse(
content: string,
toolCalls: ToolCall[],
model: string,
logger: Logger,
reasoning?: string,
+ blocks?: FixtureBlock[],
): object {
+ // Blocks-only / blocks-present fixtures: the non-streaming wire shape has no
+ // positional array, so order is a no-op here (see NOTE above). But the PAYLOAD
+ // must not be dropped: backfill `content` from text blocks (concatenated) and
+ // `tool_calls` from toolCall blocks, mirroring what the streaming path derives.
+ // Legacy (no-blocks) callers keep byte-identical output.
+ if (blocks && blocks.length > 0) {
+ const ordered = resolveFixtureBlocks(blocks);
+ content = ordered
+ .filter((b): b is { type: "text"; text: string } => b.type === "text")
+ .map((b) => b.text)
+ .join("");
+ toolCalls = ordered
+ .filter(
+ (b): b is { type: "toolCall"; name: string; arguments: string; id?: string } =>
+ b.type === "toolCall",
+ )
+ .map((b) => ({ name: b.name, arguments: b.arguments }));
+ }
+
const ollamaToolCalls = toolCalls.map((tc) => {
let argsObj: unknown;
try {
@@ -829,18 +851,19 @@ export async function handleOllama(
);
if (!streaming) {
const body = buildOllamaChatContentWithToolCallsResponse(
- response.content,
- response.toolCalls,
+ response.content ?? "",
+ response.toolCalls ?? [],
completionReq.model,
logger,
effReasoning,
+ response.blocks,
);
res.writeHead(200, { "Content-Type": "application/json" });
res.end(JSON.stringify(body));
} else {
const chunks = buildOllamaChatContentWithToolCallsChunks(
- response.content,
- response.toolCalls,
+ response.content ?? "",
+ response.toolCalls ?? [],
completionReq.model,
chunkSize,
logger,
diff --git a/src/responses.ts b/src/responses.ts
index 592f9ba5..5d0149ed 100644
--- a/src/responses.ts
+++ b/src/responses.ts
@@ -1254,8 +1254,8 @@ export async function handleResponses(
});
if (responsesReq.stream !== true) {
const body = buildContentWithToolCallsResponse(
- response.content,
- response.toolCalls,
+ response.content ?? "",
+ response.toolCalls ?? [],
completionReq.model,
effReasoning,
response.webSearches,
@@ -1266,8 +1266,8 @@ export async function handleResponses(
res.end(JSON.stringify(body));
} else {
const events = buildContentWithToolCallsStreamEvents(
- response.content,
- response.toolCalls,
+ response.content ?? "",
+ response.toolCalls ?? [],
completionReq.model,
chunkSize,
effReasoning,
diff --git a/src/server.ts b/src/server.ts
index 31da8692..2e9bb4b8 100644
--- a/src/server.ts
+++ b/src/server.ts
@@ -848,8 +848,8 @@ async function handleCompletions(
});
if (body.stream !== true) {
const completion = buildContentWithToolCallsCompletion(
- response.content,
- response.toolCalls,
+ response.content ?? "",
+ response.toolCalls ?? [],
body.model,
effReasoning,
overrides,
@@ -859,8 +859,8 @@ async function handleCompletions(
res.end(JSON.stringify(completion));
} else {
const chunks = buildContentWithToolCallsChunks(
- response.content,
- response.toolCalls,
+ response.content ?? "",
+ response.toolCalls ?? [],
body.model,
chunkSize,
effReasoning,
@@ -869,7 +869,8 @@ async function handleCompletions(
);
// Build usage chunk for stream_options.include_usage
const completionText =
- response.content + response.toolCalls.map((tc) => tc.name + tc.arguments).join("");
+ (response.content ?? "") +
+ (response.toolCalls ?? []).map((tc) => tc.name + tc.arguments).join("");
const usageChunk = includeUsage
? buildUsageChunk(
chunks[0]?.id ?? "chatcmpl-unknown",
diff --git a/src/stream-collapse.ts b/src/stream-collapse.ts
index 1fc007c5..b9a4d292 100644
--- a/src/stream-collapse.ts
+++ b/src/stream-collapse.ts
@@ -998,6 +998,11 @@ export function collapseCohereSSE(body: string): CollapseResult {
// assumes real provider indices stay below it.
let nextSyntheticIndex = 1_000_000;
let lastStartKey: number | undefined;
+ // Cross-channel order atoms (#274), in stream arrival order. Cohere v2 SSE is
+ // a single ordered stream (content-delta / tool-call-start interleave on the
+ // wire), so a toolCall atom references the same accumulator object stored in
+ // toolCallMap and later tool-call-delta fragments mutate it in place.
+ const orderAtoms: OrderAtom[] = [];
for (const block of blocks) {
const lines = splitSSELines(block);
@@ -1028,6 +1033,11 @@ export function collapseCohereSSE(body: string): CollapseResult {
reasoning += contentObj.thinking;
} else if (contentObj && typeof contentObj.text === "string") {
content += contentObj.text;
+ // Reasoning (`thinking`) is intentionally NOT pushed as an order atom —
+ // it is not a block channel (mirrors the other collapsers).
+ if (contentObj.text.length > 0) {
+ orderAtoms.push({ kind: "text", text: contentObj.text });
+ }
}
}
@@ -1046,11 +1056,15 @@ export function collapseCohereSSE(body: string): CollapseResult {
const toolCalls = message?.tool_calls as Record | undefined;
if (toolCalls) {
const fn = toolCalls.function as Record | undefined;
- toolCallMap.set(index, {
+ const created = {
id: (toolCalls.id as string) ?? "",
name: (fn?.name as string) ?? "",
arguments: "",
- });
+ };
+ toolCallMap.set(index, created);
+ // Record the tool atom at the position its tool-call-start arrived; it
+ // references `created` so later tool-call-delta args fill it in place.
+ orderAtoms.push({ kind: "toolCall", ref: created });
}
}
@@ -1085,14 +1099,33 @@ export function collapseCohereSSE(body: string): CollapseResult {
}
if (toolCallMap.size > 0) {
- const sorted = Array.from(toolCallMap.entries()).sort(([a], [b]) => a - b);
- return {
- ...(content ? { content } : {}),
- toolCalls: sorted.map(([, tc]) => ({
+ const orderedBlocks = buildOrderedBlocks(orderAtoms);
+ // When interleaved, persist ordered `blocks` and derive the flat `toolCalls`
+ // from the SAME tool atoms (stream-arrival order) so the two never disagree
+ // (#274 F4/F5). Otherwise keep the legacy index-sorted order so non-
+ // interleaved fixtures stay byte-identical. Both paths normalize arguments
+ // so a zero-arg call persists "{}" rather than "".
+ const orderedToolCalls = orderAtoms
+ .filter(
+ (a): a is { kind: "toolCall"; ref: { name: string; arguments: string; id?: string } } =>
+ a.kind === "toolCall",
+ )
+ .map((a) => ({
+ name: a.ref.name,
+ arguments: normalizeToolArguments(a.ref.arguments),
+ ...(a.ref.id ? { id: a.ref.id } : {}),
+ }));
+ const indexSortedToolCalls = Array.from(toolCallMap.entries())
+ .sort(([a], [b]) => a - b)
+ .map(([, tc]) => ({
name: tc.name,
- arguments: tc.arguments,
+ arguments: normalizeToolArguments(tc.arguments),
...(tc.id ? { id: tc.id } : {}),
- })),
+ }));
+ return {
+ ...(orderedBlocks ? { blocks: orderedBlocks } : {}),
+ ...(content ? { content } : {}),
+ toolCalls: orderedBlocks ? orderedToolCalls : indexSortedToolCalls,
...(reasoning ? { reasoning } : {}),
...(droppedChunks > 0 ? { droppedChunks } : {}),
...(firstDroppedSample ? { firstDroppedSample } : {}),
@@ -1241,6 +1274,11 @@ export function collapseBedrockEventStream(body: Buffer): CollapseResult {
let droppedChunks = 0;
let firstDroppedSample: string | undefined;
const toolCallMap = new Map();
+ // Cross-channel order atoms (#274), in frame (stream) arrival order. Both the
+ // Anthropic-native and Converse sub-protocols append text and tool deltas in
+ // frame order, so a toolCall atom references the same accumulator object stored
+ // in toolCallMap and later arg deltas mutate it in place.
+ const orderAtoms: OrderAtom[] = [];
for (const frame of frames) {
const frameStr = frame.payload.toString("utf8");
@@ -1261,6 +1299,9 @@ export function collapseBedrockEventStream(body: Buffer): CollapseResult {
const delta = parsed.delta as Record | undefined;
if (delta?.type === "text_delta" && typeof delta.text === "string") {
content += delta.text;
+ if (delta.text.length > 0) {
+ orderAtoms.push({ kind: "text", text: delta.text });
+ }
}
if (delta?.type === "thinking_delta" && typeof delta.thinking === "string") {
reasoning += delta.thinking;
@@ -1303,11 +1344,13 @@ export function collapseBedrockEventStream(body: Buffer): CollapseResult {
redactedThinking.push(redactedData);
}
if (block?.type === "tool_use" && index !== undefined) {
- toolCallMap.set(index, {
+ const created = {
id: (block.id as string) ?? "",
name: (block.name as string) ?? "",
arguments: "",
- });
+ };
+ toolCallMap.set(index, created);
+ orderAtoms.push({ kind: "toolCall", ref: created });
}
continue;
}
@@ -1322,11 +1365,13 @@ export function collapseBedrockEventStream(body: Buffer): CollapseResult {
const start = blockStart.start as Record | undefined;
if (start?.toolUse && index !== undefined) {
const toolUse = start.toolUse as Record;
- toolCallMap.set(index, {
+ const created = {
id: (toolUse.toolUseId as string) ?? "",
name: (toolUse.name as string) ?? "",
arguments: "",
- });
+ };
+ toolCallMap.set(index, created);
+ orderAtoms.push({ kind: "toolCall", ref: created });
}
}
@@ -1342,6 +1387,9 @@ export function collapseBedrockEventStream(body: Buffer): CollapseResult {
// Text delta
if (typeof delta.text === "string") {
content += delta.text;
+ if (delta.text.length > 0) {
+ orderAtoms.push({ kind: "text", text: delta.text });
+ }
}
// Reasoning delta — Converse carries reasoning in `reasoningContent.text`.
@@ -1381,13 +1429,37 @@ export function collapseBedrockEventStream(body: Buffer): CollapseResult {
}
if (toolCallMap.size > 0) {
- const sorted = Array.from(toolCallMap.entries()).sort(([a], [b]) => a - b);
- return {
- toolCalls: sorted.map(([, tc]) => ({
+ const orderedBlocks = buildOrderedBlocks(orderAtoms);
+ // When interleaved, persist ordered `blocks` and derive the flat `toolCalls`
+ // from the SAME tool atoms (frame-arrival order) so the two never disagree
+ // (#274 F4/F5); otherwise keep the legacy index-sorted order so non-
+ // interleaved fixtures stay byte-identical. Both paths normalize arguments
+ // so a zero-arg tool_use persists "{}" rather than "".
+ const orderedToolCalls = orderAtoms
+ .filter(
+ (a): a is { kind: "toolCall"; ref: { name: string; arguments: string; id?: string } } =>
+ a.kind === "toolCall",
+ )
+ .map((a) => ({
+ name: a.ref.name,
+ arguments: normalizeToolArguments(a.ref.arguments),
+ ...(a.ref.id ? { id: a.ref.id } : {}),
+ }));
+ const indexSortedToolCalls = Array.from(toolCallMap.entries())
+ .sort(([a], [b]) => a - b)
+ .map(([, tc]) => ({
name: tc.name,
- arguments: tc.arguments,
+ arguments: normalizeToolArguments(tc.arguments),
...(tc.id ? { id: tc.id } : {}),
- })),
+ }));
+ return {
+ ...(orderedBlocks ? { blocks: orderedBlocks } : {}),
+ // A tool-bearing turn that ALSO streamed text previously DROPPED `content`
+ // here. Spread it when present so the interleaved blocks are persistable
+ // (the recorder only emits blocks when content+toolCalls coexist) and the
+ // text is no longer silently lost (#274).
+ ...(content ? { content } : {}),
+ toolCalls: orderedBlocks ? orderedToolCalls : indexSortedToolCalls,
...(reasoning ? { reasoning } : {}),
...(reasoningSignature ? { reasoningSignature } : {}),
...(redactedThinking.length > 0 ? { redactedThinking } : {}),
@@ -1508,12 +1580,13 @@ export function collapseGeminiInteractionsSSE(body: string): CollapseResult {
}
}
} else if (delta.type === "function_call") {
- // Legacy 1.x — full tool call inline in a content.delta.
+ // Legacy 1.x — full tool call inline in a content.delta. Normalize the
+ // string branch so a literal `arguments: ""` persists "{}" not "".
toolCalls.push({
name: String(delta.name ?? ""),
arguments:
typeof delta.arguments === "string"
- ? delta.arguments
+ ? normalizeToolArguments(delta.arguments)
: JSON.stringify(delta.arguments ?? {}),
...(delta.id ? { id: String(delta.id) } : {}),
});
@@ -1529,6 +1602,17 @@ export function collapseGeminiInteractionsSSE(body: string): CollapseResult {
}
}
+ // BLOCK-CAPTURE DECISION (#274): this collapser is ARGS-ONLY — it intentionally
+ // does NOT emit `blocks`. Unlike the delta-stream collapsers, the 2.x protocol
+ // is step/index-addressed: tool identity arrives on `step.start` and its args
+ // on indexed `arguments_delta` fragments, finalized here in SORTED step-index
+ // order — interleaved with legacy 1.x calls that are arrival-pushed above. That
+ // hybrid flat order cannot be reconciled with arrival-order OrderAtoms by
+ // identity, so any emitted `blocks` would risk disagreeing with the flat
+ // `toolCalls` (violating the #274 F4/F5 byte-consistency invariant). We
+ // therefore only normalize arguments and leave `blocks` unset. Revisit only if
+ // the finalizer is reworked to a single arrival-ordered pass.
+
// Finalize 2.x tool calls in step-index order.
for (const [, tc] of Array.from(stepToolCalls.entries()).sort(([a], [b]) => a - b)) {
let args: string;
@@ -1537,7 +1621,12 @@ export function collapseGeminiInteractionsSSE(body: string): CollapseResult {
// The arguments_delta fragments must concatenate into valid JSON by
// step.stop. A truncated/interrupted stream can leave them malformed;
// surface that via droppedChunks rather than writing a corrupt fixture
- // silently (mirrors the per-chunk parse guard above).
+ // silently (mirrors the per-chunk parse guard above). On failure we MUST
+ // NOT persist the invalid-JSON string — it would fail `validateFixtures`
+ // on reload (the loader does `JSON.parse(tc.arguments)`). Fall back to
+ // "{}" so the persisted `arguments` is always valid JSON, exactly like
+ // the empty/unusable-args path below (mirrors the OpenAI/Anthropic
+ // sibling collapsers, which never persist unparseable tool args).
try {
JSON.parse(args);
} catch {
@@ -1548,11 +1637,18 @@ export function collapseGeminiInteractionsSSE(body: string): CollapseResult {
200,
)}`;
}
+ args = "{}";
}
} else {
args = typeof tc.argsObj === "string" ? tc.argsObj : JSON.stringify(tc.argsObj ?? {});
}
- toolCalls.push({ name: tc.name, arguments: args, ...(tc.id ? { id: tc.id } : {}) });
+ // Normalize so a step.start with no arguments / a whitespace-only argsStr
+ // persists "{}" rather than "" (a valid-JSON non-empty argsStr is unchanged).
+ toolCalls.push({
+ name: tc.name,
+ arguments: normalizeToolArguments(args),
+ ...(tc.id ? { id: tc.id } : {}),
+ });
}
if (toolCalls.length > 0) {
diff --git a/src/types.ts b/src/types.ts
index 51fae37e..ab8f88ca 100644
--- a/src/types.ts
+++ b/src/types.ts
@@ -211,13 +211,25 @@ export interface ToolCallResponse extends ResponseOverrides {
}
export interface ContentWithToolCallsResponse extends ResponseOverrides {
- content: string;
- toolCalls: ToolCall[];
+ /**
+ * Combined-turn text. Required for the legacy `{ content, toolCalls }` shape
+ * and the combined `{ content, toolCalls, blocks }` shape. OPTIONAL only for a
+ * BLOCKS-ONLY fixture (`{ blocks: [...] }` with no `content`/`toolCalls`),
+ * where the ordered `blocks` array is the sole source of streamed output and
+ * the legacy text-first builder branch is never taken. The runtime guard
+ * `isContentWithToolCallsResponse` enforces this: it matches when BOTH
+ * `content` + `toolCalls` are present (legacy/combined) OR when a non-empty
+ * `blocks` array is present (blocks-only).
+ */
+ content?: string;
+ /** See {@link ContentWithToolCallsResponse.content} — optional only for the blocks-only shape. */
+ toolCalls?: ToolCall[];
/**
* Optional ordered streaming blocks. When present, builders stream these in
* array order (tool-first / interleaved); when absent, the legacy
- * `{ content, toolCalls }` text-first path runs unchanged. Purely additive —
- * `isContentWithToolCallsResponse` still requires `content` + `toolCalls`.
+ * `{ content, toolCalls }` text-first path runs unchanged. Purely additive.
+ * A non-empty `blocks` array also makes this a FIRST-CLASS blocks-only
+ * response even without `content`/`toolCalls` (see those fields above).
*/
blocks?: FixtureBlock[];
reasoning?: string;
@@ -475,16 +487,23 @@ export interface FixtureFileTextResponse extends ResponseOverrides {
}
export interface FixtureFileContentWithToolCallsResponse extends ResponseOverrides {
- /** Accepts a JSON object or array (structured output) — the loader will JSON.stringify it. */
- content: string | Record | unknown[];
- toolCalls: FixtureFileToolCall[];
+ /**
+ * Accepts a JSON object or array (structured output) — the loader will
+ * JSON.stringify it. OPTIONAL only for a BLOCKS-ONLY fixture (mirrors
+ * {@link ContentWithToolCallsResponse.content}); required for the
+ * legacy/combined shapes.
+ */
+ content?: string | Record | unknown[];
+ /** See {@link FixtureFileContentWithToolCallsResponse.content} — optional only for the blocks-only shape. */
+ toolCalls?: FixtureFileToolCall[];
/**
* Optional ordered streaming blocks (mirrors the in-memory
* {@link ContentWithToolCallsResponse.blocks}). When present, builders stream
* these in array order (tool-first / interleaved); a `toolCall` block's
* object `arguments` is auto-stringified just like `toolCalls[].arguments`.
* Absent → legacy text-first path runs unchanged. Purely additive. Uses the
- * on-disk {@link FixtureFileBlock} shape with relaxed `arguments`.
+ * on-disk {@link FixtureFileBlock} shape with relaxed `arguments`. A non-empty
+ * `blocks` array alone makes this a first-class blocks-only fixture.
*/
blocks?: FixtureFileBlock[];
reasoning?: string;
diff --git a/src/ws-gemini-live.ts b/src/ws-gemini-live.ts
index 24274cb1..bde73c0e 100644
--- a/src/ws-gemini-live.ts
+++ b/src/ws-gemini-live.ts
@@ -23,6 +23,7 @@ import {
flattenHeaders,
formatToMime,
generateToolCallId,
+ resolveFixtureBlocks,
resolveResponse,
resolveStrictMode,
strictOverrideField,
@@ -513,7 +514,159 @@ async function processMessage(
response: { status: 200, fixture },
});
- const content = response.content;
+ // BLOCKS path (#274): when the fixture carries an ordered `blocks` array,
+ // honor it instead of the legacy `content ?? ""` / `toolCalls ?? []` path.
+ // The Gemini Live WS protocol expresses ordering via SEQUENTIAL messages —
+ // a text block becomes one-or-more `serverContent.modelTurn.parts[{text}]`
+ // messages and a toolCall block becomes a `toolCall.functionCalls` message,
+ // so emitting in array order faithfully reproduces tool-before-text (or any
+ // interleaving), matching the HTTP gemini.ts blocks branch. Without this, a
+ // blocks-only fixture (post-F0 it matches this guard) would stream an EMPTY
+ // payload — a silent drop. Legacy fixtures (no `blocks`) skip this entirely.
+ if (response.blocks && response.blocks.length > 0) {
+ const resolvedBlocks = resolveFixtureBlocks(response.blocks);
+ const interruption = createInterruptionSignal(fixture);
+ const replaySpeed = fixture.replaySpeed ?? defaults.replaySpeed;
+ const { recordedTimings } = fixture;
+ let chunkIndex = 0;
+ let interrupted = false;
+
+ // Accumulate the equivalent assistant turn for conversation history, so a
+ // follow-up turn sees the same content + tool_calls as the legacy path.
+ let historyContent = "";
+ const historyToolCalls: {
+ id: string;
+ type: "function";
+ function: { name: string; arguments: string };
+ }[] = [];
+
+ outer: for (const block of resolvedBlocks) {
+ if (block.type === "toolCall") {
+ if (ws.isClosed) break;
+ const tcDelay = calculateDelay(
+ chunkIndex,
+ undefined,
+ latency,
+ recordedTimings,
+ replaySpeed,
+ );
+ if (tcDelay > 0) await delay(tcDelay, interruption?.signal);
+ if (interruption?.signal.aborted) {
+ interrupted = true;
+ break;
+ }
+ if (ws.isClosed) break;
+
+ const resolvedId = block.id ?? generateToolCallId();
+ let argsObj: Record;
+ try {
+ argsObj = JSON.parse(block.arguments || "{}") as Record;
+ } catch {
+ defaults.logger.warn(
+ `Malformed JSON in fixture tool call arguments for "${block.name}": ${block.arguments}`,
+ );
+ argsObj = {};
+ }
+
+ try {
+ ws.send(
+ JSON.stringify({
+ toolCall: { functionCalls: [{ name: block.name, args: argsObj, id: resolvedId }] },
+ }),
+ );
+ } catch (err) {
+ defaults.logger.debug(
+ "[gemini-live] send failed during blocks streaming, closing",
+ err,
+ );
+ break;
+ }
+ chunkIndex++;
+ historyToolCalls.push({
+ id: resolvedId,
+ type: "function" as const,
+ function: { name: block.name, arguments: block.arguments },
+ });
+ interruption?.tick();
+ if (interruption?.signal.aborted) {
+ interrupted = true;
+ break;
+ }
+ } else {
+ const text = block.text;
+ historyContent += text;
+ if (text.length === 0) {
+ // An empty text block carries no wire content, so emit nothing and
+ // spend no chunk: the old guard sent a useless empty `modelTurn`
+ // message and `continue`d WITHOUT `chunkIndex++`/`interruption.tick()`,
+ // which leaked the next block past `truncateAfterChunks` and shifted
+ // `recordedTimings` indexing for every following block. Skipping
+ // keeps non-empty-block output byte-identical and the chunk/timing
+ // accounting correct.
+ continue;
+ }
+ for (let i = 0; i < text.length; i += chunkSize) {
+ if (ws.isClosed) break outer;
+ const chunkDelay = calculateDelay(
+ chunkIndex,
+ undefined,
+ latency,
+ recordedTimings,
+ replaySpeed,
+ );
+ if (chunkDelay > 0) await delay(chunkDelay, interruption?.signal);
+ if (interruption?.signal.aborted) {
+ interrupted = true;
+ break outer;
+ }
+ if (ws.isClosed) break outer;
+ try {
+ ws.send(
+ JSON.stringify({
+ serverContent: { modelTurn: { parts: [{ text: text.slice(i, i + chunkSize) }] } },
+ }),
+ );
+ } catch (err) {
+ defaults.logger.debug(
+ "[gemini-live] send failed during blocks streaming, closing",
+ err,
+ );
+ break outer;
+ }
+ chunkIndex++;
+ interruption?.tick();
+ if (interruption?.signal.aborted) {
+ interrupted = true;
+ break outer;
+ }
+ }
+ }
+ }
+
+ if (interrupted) {
+ ws.destroy();
+ journalEntry.response.interrupted = true;
+ journalEntry.response.interruptReason = interruption?.reason();
+ interruption?.cleanup();
+ return;
+ }
+
+ interruption?.cleanup();
+
+ // Send turnComplete
+ if (!ws.isClosed) {
+ ws.send(JSON.stringify({ serverContent: { turnComplete: true } }));
+ }
+
+ session.conversationHistory.push({
+ role: "assistant",
+ content: historyContent || null,
+ ...(historyToolCalls.length > 0 ? { tool_calls: historyToolCalls } : {}),
+ });
+ return;
+ }
+
+ const content = response.content ?? "";
const chunkList: string[] = [];
for (let i = 0; i < content.length; i += chunkSize) {
chunkList.push(content.slice(i, i + chunkSize));
@@ -575,7 +728,7 @@ async function processMessage(
}
// Pre-compute tool calls with stable IDs so wire message and history match
- const resolvedToolCalls = response.toolCalls.map((tc) => ({
+ const resolvedToolCalls = (response.toolCalls ?? []).map((tc) => ({
...tc,
resolvedId: tc.id ?? generateToolCallId(),
}));
@@ -782,7 +935,7 @@ async function processMessage(
}
// Pre-compute tool calls with stable IDs so wire message and history match
- const resolvedToolCalls = response.toolCalls.map((tc) => ({
+ const resolvedToolCalls = (response.toolCalls ?? []).map((tc) => ({
...tc,
resolvedId: tc.id ?? generateToolCallId(),
}));
diff --git a/src/ws-realtime.ts b/src/ws-realtime.ts
index b0ce2d7f..4b1ad1e3 100644
--- a/src/ws-realtime.ts
+++ b/src/ws-realtime.ts
@@ -7,7 +7,13 @@
*/
import { randomBytes } from "node:crypto";
-import type { ChatCompletionRequest, ChatMessage, Fixture } from "./types.js";
+import type {
+ ChatCompletionRequest,
+ ChatMessage,
+ Fixture,
+ FixtureBlock,
+ JournalEntry,
+} from "./types.js";
import { matchFixtureDiagnostic } from "./router.js";
import {
generateToolCallId,
@@ -16,6 +22,7 @@ import {
isToolCallResponse,
isContentWithToolCallsResponse,
isErrorResponse,
+ resolveFixtureBlocks,
resolveResponse,
resolveStrictMode,
strictOverrideField,
@@ -851,6 +858,31 @@ async function handleResponseCreate(
response: { status: 200, fixture },
});
+ // ── Ordered blocks path (#274) ──────────────────────────────────
+ // When the fixture supplies an ordered `blocks` array, stream the items in
+ // array order so a blocks-only fixture (no `content`/`toolCalls`) never
+ // emits an empty payload, and a combined `{content,toolCalls,blocks}`
+ // fixture preserves block ordering vs the HTTP path. Unlike OpenAI
+ // chat-completions (separate, non-positional content/tool channels), the
+ // Realtime WS protocol sequences output items on the wire with explicit
+ // `output_index`, so block order — including tool-before-text — IS
+ // observable to a client and is honored here.
+ if (response.blocks && response.blocks.length > 0) {
+ await streamRealtimeBlocks(
+ ws,
+ resolveFixtureBlocks(response.blocks),
+ responseId,
+ fixture,
+ defaults,
+ latency,
+ chunkSize,
+ isBeta,
+ conversationItems,
+ journalEntry,
+ );
+ return;
+ }
+
// response.created
sendEvent(
ws,
@@ -923,7 +955,7 @@ async function handleResponseCreate(
);
// response.output_text.delta (chunked) — GA name
- const content = response.content;
+ const content = response.content ?? "";
const replaySpeed = fixture.replaySpeed ?? defaults.replaySpeed;
const { recordedTimings } = fixture;
let eventIndex = 0;
@@ -1056,8 +1088,9 @@ async function handleResponseCreate(
allOutputItems.push(textOutputItem);
// ── Tool call parts ────────────────────────────────────────────
- for (let tcIdx = 0; tcIdx < response.toolCalls.length; tcIdx++) {
- const tc = response.toolCalls[tcIdx];
+ const toolCalls = response.toolCalls ?? [];
+ for (let tcIdx = 0; tcIdx < toolCalls.length; tcIdx++) {
+ const tc = toolCalls[tcIdx];
const callId = tc.id ?? generateToolCallId();
const itemId = realtimeId("item");
const outputIndex = tcIdx + 1; // offset by 1 for the text item
@@ -1307,7 +1340,7 @@ async function handleResponseCreate(
);
// response.output_text.delta (chunked) — GA name
- const content = response.content;
+ const content = response.content ?? "";
const replaySpeed = fixture.replaySpeed ?? defaults.replaySpeed;
const { recordedTimings } = fixture;
const interruption = createInterruptionSignal(fixture);
@@ -1667,3 +1700,382 @@ async function handleResponseCreate(
"server_error",
);
}
+
+/**
+ * Stream a content+toolCalls fixture's ordered `blocks` over the Realtime WS
+ * surface (#274). Each block becomes its own output item, sequenced on the wire
+ * with an incrementing `output_index` so a client observes the blocks in array
+ * order (text and tool-call items can interleave in any order, unlike the
+ * separate channels of chat-completions). Emits the same per-item event shapes
+ * as the legacy text/tool branches; a blocks-only fixture therefore never
+ * produces an empty payload.
+ */
+async function streamRealtimeBlocks(
+ ws: WebSocketConnection,
+ blocks: FixtureBlock[],
+ responseId: string,
+ fixture: Fixture,
+ defaults: { latency: number; chunkSize: number; replaySpeed?: number; logger: Logger },
+ latency: number,
+ chunkSize: number,
+ isBeta: boolean,
+ conversationItems: RealtimeItem[],
+ journalEntry: JournalEntry,
+): Promise {
+ // response.created
+ sendEvent(
+ ws,
+ {
+ type: "response.created",
+ response: {
+ id: responseId,
+ object: "realtime.response",
+ status: "in_progress",
+ status_details: null,
+ output: [],
+ usage: null,
+ },
+ },
+ isBeta,
+ );
+
+ const interruption = createInterruptionSignal(fixture);
+ let interrupted = false;
+ const allOutputItems: unknown[] = [];
+ const replaySpeed = fixture.replaySpeed ?? defaults.replaySpeed;
+ const { recordedTimings } = fixture;
+ let eventIndex = 0;
+
+ // A text block following a tool-call block (or any tool present in the array)
+ // is "commentary"; a lone text block is the "final_answer".
+ const hasToolCalls = blocks.some((b) => b.type === "toolCall");
+
+ for (let outputIndex = 0; outputIndex < blocks.length; outputIndex++) {
+ if (ws.isClosed || interrupted) break;
+ const block = blocks[outputIndex];
+
+ if (block.type === "text") {
+ const textItemId = realtimeId("item");
+ const contentIndex = 0;
+ const textPhase = hasToolCalls ? "commentary" : "final_answer";
+ const text = block.text;
+
+ const textOutputItem = {
+ id: textItemId,
+ type: "message",
+ role: "assistant",
+ status: "completed",
+ content: [{ type: "output_text", text }],
+ };
+
+ sendEvent(
+ ws,
+ {
+ type: "response.output_item.added",
+ response_id: responseId,
+ output_index: outputIndex,
+ item: {
+ id: textItemId,
+ type: "message",
+ role: "assistant",
+ status: "in_progress",
+ content: [],
+ phase: textPhase,
+ },
+ },
+ isBeta,
+ );
+
+ sendEvent(
+ ws,
+ {
+ type: "response.content_part.added",
+ response_id: responseId,
+ item_id: textItemId,
+ output_index: outputIndex,
+ content_index: contentIndex,
+ part: { type: "output_text", text: "" },
+ },
+ isBeta,
+ );
+
+ for (let i = 0; i < text.length; i += chunkSize) {
+ if (ws.isClosed) break;
+ const chunkDelay = calculateDelay(
+ eventIndex,
+ undefined,
+ latency,
+ recordedTimings,
+ replaySpeed,
+ );
+ if (chunkDelay > 0) await delay(chunkDelay, interruption?.signal);
+ if (interruption?.signal.aborted) {
+ interrupted = true;
+ break;
+ }
+ if (ws.isClosed) break;
+ const chunk = text.slice(i, i + chunkSize);
+ try {
+ sendEvent(
+ ws,
+ {
+ type: "response.output_text.delta",
+ response_id: responseId,
+ item_id: textItemId,
+ output_index: outputIndex,
+ content_index: contentIndex,
+ delta: chunk,
+ },
+ isBeta,
+ );
+ } catch (err) {
+ defaults.logger.debug(
+ "[ws-realtime] send failed during block text streaming, closing",
+ err,
+ );
+ break;
+ }
+ eventIndex++;
+ interruption?.tick();
+ if (interruption?.signal.aborted) {
+ interrupted = true;
+ break;
+ }
+ }
+
+ if (interrupted || ws.isClosed) break;
+
+ sendEvent(
+ ws,
+ {
+ type: "response.output_text.done",
+ response_id: responseId,
+ item_id: textItemId,
+ output_index: outputIndex,
+ content_index: contentIndex,
+ text,
+ },
+ isBeta,
+ );
+
+ if (ws.isClosed) break;
+
+ sendEvent(
+ ws,
+ {
+ type: "response.content_part.done",
+ response_id: responseId,
+ item_id: textItemId,
+ output_index: outputIndex,
+ content_index: contentIndex,
+ part: { type: "output_text", text },
+ },
+ isBeta,
+ );
+
+ if (ws.isClosed) break;
+
+ sendEvent(
+ ws,
+ {
+ type: "response.output_item.done",
+ response_id: responseId,
+ output_index: outputIndex,
+ item: { ...textOutputItem, phase: textPhase },
+ },
+ isBeta,
+ );
+
+ sendEvent(
+ ws,
+ {
+ type: "conversation.item.done",
+ item: {
+ id: textItemId,
+ object: "realtime.item",
+ type: "message",
+ role: "assistant",
+ status: "completed",
+ content: textOutputItem.content,
+ },
+ },
+ isBeta,
+ );
+
+ if (ws.isClosed) break;
+ allOutputItems.push(textOutputItem);
+ } else {
+ const callId = block.id ?? generateToolCallId();
+ const itemId = realtimeId("item");
+ const args = block.arguments;
+
+ const toolOutputItem = {
+ id: itemId,
+ type: "function_call",
+ status: "completed",
+ call_id: callId,
+ name: block.name,
+ arguments: args,
+ };
+
+ sendEvent(
+ ws,
+ {
+ type: "response.output_item.added",
+ response_id: responseId,
+ output_index: outputIndex,
+ item: {
+ id: itemId,
+ type: "function_call",
+ status: "in_progress",
+ call_id: callId,
+ name: block.name,
+ arguments: "",
+ phase: "final_answer",
+ },
+ },
+ isBeta,
+ );
+
+ for (let i = 0; i < args.length; i += chunkSize) {
+ if (ws.isClosed) break;
+ const chunkDelay = calculateDelay(
+ eventIndex,
+ undefined,
+ latency,
+ recordedTimings,
+ replaySpeed,
+ );
+ if (chunkDelay > 0) await delay(chunkDelay, interruption?.signal);
+ if (interruption?.signal.aborted) {
+ interrupted = true;
+ break;
+ }
+ if (ws.isClosed) break;
+ const chunk = args.slice(i, i + chunkSize);
+ try {
+ sendEvent(
+ ws,
+ {
+ type: "response.function_call_arguments.delta",
+ response_id: responseId,
+ item_id: itemId,
+ output_index: outputIndex,
+ call_id: callId,
+ delta: chunk,
+ },
+ isBeta,
+ );
+ } catch (err) {
+ defaults.logger.debug(
+ "[ws-realtime] send failed during block tool call streaming, closing",
+ err,
+ );
+ break;
+ }
+ eventIndex++;
+ interruption?.tick();
+ if (interruption?.signal.aborted) {
+ interrupted = true;
+ break;
+ }
+ }
+
+ if (interrupted || ws.isClosed) break;
+
+ sendEvent(
+ ws,
+ {
+ type: "response.function_call_arguments.done",
+ response_id: responseId,
+ item_id: itemId,
+ output_index: outputIndex,
+ call_id: callId,
+ arguments: args,
+ },
+ isBeta,
+ );
+
+ if (ws.isClosed) break;
+
+ sendEvent(
+ ws,
+ {
+ type: "response.output_item.done",
+ response_id: responseId,
+ output_index: outputIndex,
+ item: { ...toolOutputItem, phase: "final_answer" },
+ },
+ isBeta,
+ );
+
+ sendEvent(
+ ws,
+ {
+ type: "conversation.item.done",
+ item: {
+ id: itemId,
+ object: "realtime.item",
+ type: "function_call",
+ status: "completed",
+ call_id: callId,
+ name: block.name,
+ arguments: args,
+ },
+ },
+ isBeta,
+ );
+
+ if (ws.isClosed) break;
+ allOutputItems.push(toolOutputItem);
+ }
+ }
+
+ if (interrupted) {
+ ws.destroy();
+ journalEntry.response.interrupted = true;
+ journalEntry.response.interruptReason = interruption?.reason();
+ interruption?.cleanup();
+ return;
+ }
+
+ interruption?.cleanup();
+
+ if (ws.isClosed) return;
+
+ // response.done
+ sendEvent(
+ ws,
+ {
+ type: "response.done",
+ response: {
+ id: responseId,
+ object: "realtime.response",
+ status: "completed",
+ output: allOutputItems,
+ usage: { total_tokens: 0, input_tokens: 0, output_tokens: 0 },
+ },
+ },
+ isBeta,
+ );
+
+ // Accumulate into conversation for multi-turn (each output item, in order).
+ for (const item of allOutputItems) {
+ const it = item as { type: string; id: string; content?: unknown };
+ if (it.type === "message") {
+ conversationItems.push({
+ type: "message",
+ id: it.id,
+ role: "assistant",
+ content: [
+ {
+ type: "text",
+ text: ((it.content as Array<{ text?: string }>)?.[0]?.text ?? "") as string,
+ },
+ ],
+ });
+ } else {
+ conversationItems.push(item as RealtimeItem);
+ }
+ }
+}
diff --git a/src/ws-responses.ts b/src/ws-responses.ts
index c1e0f965..acef2f13 100644
--- a/src/ws-responses.ts
+++ b/src/ws-responses.ts
@@ -270,8 +270,8 @@ async function processMessage(
});
const events = buildContentWithToolCallsStreamEvents(
- response.content,
- response.toolCalls,
+ response.content ?? "",
+ response.toolCalls ?? [],
completionReq.model,
chunkSize,
resolveReasoningForModel(