CopilotKit/showcase/integrations/langgraph-python/tests/e2e/tool-rendering-default-catchall.spec.ts at main · samuelson-chen/CopilotKit · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
import { test, expect } from "@playwright/test";

// QA reference: qa/tool-rendering-default-catchall.md
// Demo source: src/app/demos/tool-rendering-default-catchall/page.tsx
//
// This cell registers ZERO custom render hooks. The runtime falls back
// to the framework's built-in DefaultToolCallRenderer, which paints
// every tool call with a stable `[data-testid="copilot-tool-render"]`
// wrapper plus a `data-tool-name="<name>"` attribute. We assert on the
// built-in contract — branded testids from sibling cells stay at zero.

const SUGGESTION_TIMEOUT = 15000;
const TOOL_TIMEOUT = 60000;

const PILLS = ["Weather in SF", "Find flights", "Roll a d20", "Chain tools"];

test.describe("Tool Rendering — Default Catch-all", () => {
  test.beforeEach(async ({ page }) => {
    await page.goto("/demos/tool-rendering-default-catchall");
    await expect(page.getByPlaceholder("Type a message")).toBeVisible({
      timeout: SUGGESTION_TIMEOUT,
    });
  });

  test("page loads with composer and 4 suggestion pills", async ({ page }) => {
    const suggestions = page.locator('[data-testid="copilot-suggestion"]');
    for (const title of PILLS) {
      await expect(suggestions.filter({ hasText: title }).first()).toBeVisible({
        timeout: SUGGESTION_TIMEOUT,
      });
    }

    // Sanity: branded sibling-cell testids stay at zero on this cell.
    await expect(page.locator('[data-testid="weather-card"]')).toHaveCount(0);
    await expect(page.locator('[data-testid="flights-card"]')).toHaveCount(0);
    await expect(page.locator('[data-testid="stock-card"]')).toHaveCount(0);
    await expect(page.locator('[data-testid="d20-card"]')).toHaveCount(0);
    await expect(
      page.locator('[data-testid="custom-wildcard-card"]'),
    ).toHaveCount(0);
  });

  test("Weather in SF pill paints the built-in default card for get_weather", async ({
    page,
  }) => {
    await page
      .locator('[data-testid="copilot-suggestion"]')
      .filter({ hasText: "Weather in SF" })
      .first()
      .click();

    const card = page
      .locator(
        '[data-testid="copilot-tool-render"][data-tool-name="get_weather"]',
      )
      .first();
    await expect(card).toBeVisible({ timeout: TOOL_TIMEOUT });

    // Args are pinned to San Francisco (verbatim pill prompt → fixture).
    await expect
      .poll(async () => card.getAttribute("data-args"), {
        timeout: TOOL_TIMEOUT,
      })
      .toContain("San Francisco");

    // No branded sibling-cell card mounted.
    await expect(page.locator('[data-testid="weather-card"]')).toHaveCount(0);
    await expect(
      page.locator('[data-testid="custom-wildcard-card"]'),
    ).toHaveCount(0);
  });

  test("Find flights pill paints the built-in default card for search_flights", async ({
    page,
  }) => {
    await page
      .locator('[data-testid="copilot-suggestion"]')
      .filter({ hasText: "Find flights" })
      .first()
      .click();

    const card = page
      .locator(
        '[data-testid="copilot-tool-render"][data-tool-name="search_flights"]',
      )
      .first();
    await expect(card).toBeVisible({ timeout: TOOL_TIMEOUT });

    // Result attribute carries the deterministic fixture flights (NOT
    // the a2ui beautiful-chat shape).
    await expect
      .poll(async () => card.getAttribute("data-result"), {
        timeout: TOOL_TIMEOUT,
      })
      .toMatch(/United|Delta|JetBlue/);

    await expect(page.locator('[data-testid="flights-card"]')).toHaveCount(0);
  });

  test("Roll a d20 pill paints exactly 5 default cards for roll_d20", async ({
    page,
  }) => {
    await page
      .locator('[data-testid="copilot-suggestion"]')
      .filter({ hasText: "Roll a d20" })
      .first()
      .click();

    const cards = page.locator(
      '[data-testid="copilot-tool-render"][data-tool-name="roll_d20"]',
    );

    await expect
      .poll(async () => cards.count(), { timeout: TOOL_TIMEOUT })
      .toBe(5);

    // 5th card's result must contain "20" (the final scripted roll).
    const lastResult = await cards.nth(4).getAttribute("data-result");
    expect(lastResult ?? "").toMatch(/"value":\s*20|"result":\s*20/);

    // First 4 results are not-20.
    for (let i = 0; i < 4; i++) {
      const r = (await cards.nth(i).getAttribute("data-result")) ?? "";
      expect(r).not.toMatch(/"value":\s*20|"result":\s*20/);
    }

    await expect(page.locator('[data-testid="d20-card"]')).toHaveCount(0);
  });

  test("Chain tools pill paints 3 default cards (weather + flights + d20)", async ({
    page,
  }) => {
    await page
      .locator('[data-testid="copilot-suggestion"]')
      .filter({ hasText: "Chain tools" })
      .first()
      .click();

    await expect(
      page
        .locator(
          '[data-testid="copilot-tool-render"][data-tool-name="get_weather"]',
        )
        .first(),
    ).toBeVisible({ timeout: TOOL_TIMEOUT });
    await expect(
      page
        .locator(
          '[data-testid="copilot-tool-render"][data-tool-name="search_flights"]',
        )
        .first(),
    ).toBeVisible({ timeout: TOOL_TIMEOUT });
    await expect(
      page
        .locator(
          '[data-testid="copilot-tool-render"][data-tool-name="roll_d20"]',
        )
        .first(),
    ).toBeVisible({ timeout: TOOL_TIMEOUT });
  });

  // Regression for the aimock multi-pill bug:
  // The d20 and Chain-tools fixtures used `turnIndex` + `hasToolResult` to
  // disambiguate sequential iterations of the same prompt. Those gates
  // count *global* thread state: clicking Find flights first left two
  // assistant messages and one tool message behind, so the d20 loop
  // entered at `turnIndex=2` (skipping rolls 7 and 14, hence only 3
  // cards), and the Chain-tools tool-emitting fixture was skipped
  // entirely (`hasToolResult: false` failed) so the pill went straight to
  // the "Done — Tokyo is sunny…" content with no tool cards. Fix: chain
  // all follow-ups via `toolCallId`, drop the global gates. This test
  // drives the three offending pills in a single thread and asserts the
  // expected card counts for each.
  test("sequential pills in one thread render full card sequences for each", async ({
    page,
  }) => {
    // Three sequential pills × multi-tool chains × LLM-mock latency easily
    // exceeds Playwright's 30s default. Bumped to cover the worst case.
    test.setTimeout(240_000);

    await page
      .locator('[data-testid="copilot-suggestion"]')
      .filter({ hasText: "Find flights" })
      .first()
      .click();
    const flights = page.locator(
      '[data-testid="copilot-tool-render"][data-tool-name="search_flights"]',
    );
    await expect(flights).toHaveCount(1, { timeout: TOOL_TIMEOUT });

    await page
      .locator('[data-testid="copilot-suggestion"]')
      .filter({ hasText: "Roll a d20" })
      .first()
      .click();
    const d20 = page.locator(
      '[data-testid="copilot-tool-render"][data-tool-name="roll_d20"]',
    );
    await expect
      .poll(async () => d20.count(), { timeout: TOOL_TIMEOUT })
      .toBe(5);
    // Final scripted roll lands the 20 — proves the chain advanced through
    // all 5 fixtures, not just the first two before bailing to content.
    await expect
      .poll(async () => d20.nth(4).getAttribute("data-result"), {
        timeout: TOOL_TIMEOUT,
      })
      .toMatch(/"value":\s*20|"result":\s*20/);
    await expect(page.getByText("Rolled the d20 five times")).toBeVisible({
      timeout: TOOL_TIMEOUT,
    });
  });

  test("every rendered card matches the built-in default-renderer DOM signature", async ({
    page,
  }) => {
    // Drive a single pill that produces a single card so the assertions
    // here are scoped to the exact DOM the framework's default renderer
    // produces.
    await page
      .locator('[data-testid="copilot-suggestion"]')
      .filter({ hasText: "Weather in SF" })
      .first()
      .click();

    const card = page.locator('[data-testid="copilot-tool-render"]').first();
    await expect(card).toBeVisible({ timeout: TOOL_TIMEOUT });

    // The built-in default renderer always exposes name + status pill.
    await expect(
      card.locator('[data-testid="copilot-tool-render-name"]'),
    ).toBeVisible({ timeout: TOOL_TIMEOUT });
    await expect(
      card.locator('[data-testid="copilot-tool-render-status"]'),
    ).toBeVisible({ timeout: TOOL_TIMEOUT });

    // Every card on the page shares the same wrapper testid count as
    // the inner-name and inner-status testids — proves the built-in
    // shell is what's painting (no per-tool shells).
    const total = await page
      .locator('[data-testid="copilot-tool-render"]')
      .count();
    await expect(
      page.locator('[data-testid="copilot-tool-render-name"]'),
    ).toHaveCount(total);
    await expect(
      page.locator('[data-testid="copilot-tool-render-status"]'),
    ).toHaveCount(total);
  });
});