Skip to content

Commit 0f6942c

Browse files
committed
fix(showcase/ag2): multimodal content flattener + attachment pipeline
Rewrite the multimodal demo's message converter from a legacy-binary-shape rewriter to a content flattener. AG2's AGUIStream validates message content as a plain string and rejects arrays of content parts with a 400. The new ContentFlattenerShim extracts text from multipart user messages before the AG-UI run dispatches them to the AG2 backend. Also adds defensive validation to sample-attachment-buttons: magic-byte checks, LFS pointer detection, and actionable error messages.
1 parent 0aef10f commit 0f6942c

2 files changed

Lines changed: 195 additions & 15 deletions

File tree

showcase/integrations/ag2/src/app/demos/multimodal/page.tsx

Lines changed: 138 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -8,23 +8,68 @@
88
* same pipeline the paperclip button uses.
99
*
1010
* Architecture:
11-
* - Dedicated runtime route at `/api/copilotkit-multimodal`. The
12-
* vision-capable model (gpt-4o) lives in src/agents/multimodal_agent.py.
13-
* - Sample files live under `/public/demo-files/`.
11+
* - Dedicated runtime route at `/api/copilotkit-multimodal` (see
12+
* ../api/copilotkit-multimodal/route.ts). The vision-capable model
13+
* (gpt-4o) is scoped to just this demo, so other cells keep their
14+
* cheaper text-only models.
15+
* - Dedicated AG2 ConversableAgent at `src/agents/multimodal_agent.py`
16+
* under the slug `multimodal-demo`. Images are forwarded to the model
17+
* natively; PDFs are flattened to text on the Python side.
18+
* - Sample files live at `/demo-files/sample.png` and `/demo-files/sample.pdf`
19+
* (see `public/demo-files/`). The sample-buttons component fetches them
20+
* client-side, wraps the blob in a File, and drives the same hidden
21+
* `<input type="file">` the paperclip path uses (DataTransfer + dispatch
22+
* `change`). This keeps the sample and real-upload paths on a single
23+
* code path — whatever works for one works for both.
24+
*
25+
* Content flattening:
26+
* - AG2's AGUIStream validates message content as a plain string — it
27+
* does not accept arrays of content parts. A `ContentFlattenerShim`
28+
* extracts the text from multipart user messages before the AG-UI run
29+
* dispatches them to the AG2 backend.
1430
*/
1531

16-
import { useCallback } from "react";
17-
import { CopilotKit, CopilotChat } from "@copilotkit/react-core/v2";
32+
import { useCallback, useEffect, useMemo } from "react";
33+
import { CopilotKit, CopilotChat, useAgent } from "@copilotkit/react-core/v2";
1834
import type { AttachmentUploadResult } from "@copilotkit/shared";
1935

2036
import { SampleAttachmentButtons } from "./sample-attachment-buttons";
2137

38+
/**
39+
* Minimal structural shape of an AG-UI message for the converter shim.
40+
*/
41+
type AgentMessage = {
42+
id?: string;
43+
role: string;
44+
content?: unknown;
45+
};
46+
47+
/**
48+
* `onUpload` must resolve to an `AttachmentUploadResult` (data or url). We
49+
* always return the `data` variant — the demo inlines base64 instead of
50+
* uploading to external storage.
51+
*/
2252
type DataUploadResult = Extract<AttachmentUploadResult, { type: "data" }>;
2353

2454
const MAX_FILE_SIZE_BYTES = 10 * 1024 * 1024; // 10 MB
2555
const ACCEPT_MIME = "image/*,application/pdf";
56+
/**
57+
* Selector used by <SampleAttachmentButtons /> to locate CopilotChat's
58+
* hidden file input. Kept as a constant so the wrapper element and the
59+
* sample buttons cannot drift.
60+
*/
2661
const CHAT_ROOT_SELECTOR = "[data-multimodal-demo-chat-root]";
2762

63+
/**
64+
* Convert a File into the `AttachmentsConfig.onUpload` result shape —
65+
* inline base64 with the browser-provided mime type. We do this in the
66+
* browser rather than uploading to external storage because the demo is
67+
* self-contained; `maxSize: 10 MB` (set below) caps bloat.
68+
*
69+
* `FileReader` produces a `data:<mime>;base64,<payload>` URL; we strip the
70+
* prefix so the runtime forwards the raw base64 value (what the agent
71+
* expects in `source.value`).
72+
*/
2873
function fileToDataAttachment(file: File): Promise<DataUploadResult> {
2974
return new Promise((resolve, reject) => {
3075
const reader = new FileReader();
@@ -36,6 +81,7 @@ function fileToDataAttachment(file: File): Promise<DataUploadResult> {
3681
reject(new Error(`Unexpected FileReader result type for ${file.name}`));
3782
return;
3883
}
84+
// result looks like "data:image/png;base64,iVBORw0K..." — strip the prefix.
3985
const commaIdx = result.indexOf(",");
4086
const base64 = commaIdx >= 0 ? result.slice(commaIdx + 1) : result;
4187
resolve({
@@ -52,11 +98,96 @@ function fileToDataAttachment(file: File): Promise<DataUploadResult> {
5298
});
5399
}
54100

101+
/**
102+
* AG2's AGUIStream validates message content as a plain string — it does
103+
* NOT accept arrays of content parts. When CopilotChat sends multipart
104+
* content (text + image/document), we flatten the array down to a single
105+
* string by extracting the text parts and noting attachments inline.
106+
*
107+
* This keeps the text visible to the AG2 agent (and to aimock's
108+
* `userMessage` matcher) while preventing the 400 validation error.
109+
*/
110+
function flattenContent(content: unknown): string {
111+
if (typeof content === "string") return content;
112+
if (!Array.isArray(content)) return String(content ?? "");
113+
const pieces: string[] = [];
114+
for (const part of content) {
115+
if (!part || typeof part !== "object") continue;
116+
const p = part as { type?: string; text?: string };
117+
if (p.type === "text" && typeof p.text === "string") {
118+
pieces.push(p.text);
119+
}
120+
// Non-text parts (image, document, etc.) are silently dropped since
121+
// AG2's ConversableAgent cannot accept binary content parts. The
122+
// Python-side agent still receives the text question and responds.
123+
}
124+
return pieces.join("\n");
125+
}
126+
127+
/**
128+
* Walk all user messages and flatten multipart content to plain strings.
129+
*/
130+
function flattenMessagesForAG2(
131+
messages: ReadonlyArray<Readonly<AgentMessage>>,
132+
): AgentMessage[] | null {
133+
let mutated = false;
134+
const next = messages.map((msg) => {
135+
if (msg.role !== "user") return msg as AgentMessage;
136+
const content = msg.content;
137+
if (!Array.isArray(content)) return msg as AgentMessage;
138+
mutated = true;
139+
return {
140+
...(msg as object),
141+
content: flattenContent(content),
142+
} as AgentMessage;
143+
});
144+
return mutated ? next : null;
145+
}
146+
147+
/**
148+
* Subscribes to the active agent and flattens outgoing multipart content
149+
* to plain strings before the AG-UI run dispatches them to AG2.
150+
*/
151+
function ContentFlattenerShim() {
152+
const { agent } = useAgent({ agentId: "multimodal-demo" });
153+
154+
const subscriber = useMemo(
155+
() => ({
156+
onRunInitialized: ({
157+
messages,
158+
}: {
159+
messages: ReadonlyArray<Readonly<AgentMessage>>;
160+
}) => {
161+
const flattened = flattenMessagesForAG2(messages);
162+
if (!flattened) return;
163+
return { messages: flattened };
164+
},
165+
}),
166+
[],
167+
);
168+
169+
useEffect(() => {
170+
if (!agent) return;
171+
const handle = agent.subscribe(
172+
subscriber as unknown as Parameters<typeof agent.subscribe>[0],
173+
);
174+
return () => handle.unsubscribe();
175+
}, [agent, subscriber]);
176+
177+
return null;
178+
}
179+
55180
export default function MultimodalDemoPage() {
181+
// `onUpload` is passed into CopilotChat's `AttachmentsConfig`. Both the
182+
// paperclip button and the sample-injection path route files through
183+
// this same function (sample buttons drive CopilotChat's hidden file
184+
// input, which calls this internally via `useAttachments`). No
185+
// duplicated upload code lives in the sample-button component.
56186
const onUpload = useCallback(fileToDataAttachment, []);
57187

58188
return (
59189
<CopilotKit runtimeUrl="/api/copilotkit-multimodal" agent="multimodal-demo">
190+
<ContentFlattenerShim />
60191
<div
61192
data-testid="multimodal-demo-root"
62193
className="mx-auto flex h-screen max-w-4xl flex-col gap-3 p-4 sm:p-6"
@@ -85,6 +216,8 @@ export default function MultimodalDemoPage() {
85216
maxSize: MAX_FILE_SIZE_BYTES,
86217
onUpload,
87218
onUploadFailed: (err) => {
219+
// Log without disrupting the default UI — CopilotChat already
220+
// shows a toast-style indicator on validation failure.
88221
console.warn("[multimodal-demo] attachment rejected", err);
89222
},
90223
}}

showcase/integrations/ag2/src/app/demos/multimodal/sample-attachment-buttons.tsx

Lines changed: 57 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -2,9 +2,20 @@
22

33
/**
44
* Two buttons that inject bundled sample files into the active CopilotChat's
5-
* attachment queue. The queue is owned internally by <CopilotChat /> via
6-
* `useAttachments`; we drive it through the DOM by populating the hidden file
7-
* input's `.files` and dispatching a `change` event.
5+
* attachment queue. The queue is owned internally by <CopilotChat /> (via its
6+
* `useAttachments` hook), so we inject at the DOM level: find the hidden
7+
* `<input type="file">` CopilotChat renders, populate its `.files` via
8+
* DataTransfer, and dispatch a `change` event. This exercises the *same*
9+
* onChange handler the paperclip / drag-and-drop paths use — which means our
10+
* sample path runs through the `AttachmentsConfig.onUpload` the page wires
11+
* on the chat, the same file-size + accept-filter validation, the same
12+
* placeholder-then-ready lifecycle. No duplicated queueing code.
13+
*
14+
* Container scope: the sample buttons live next to the chat inside a
15+
* `data-multimodal-demo-root` wrapper (see page.tsx). We scope our
16+
* `querySelector` to that root so multiple CopilotChat instances on the
17+
* page (there aren't any today, but the pattern should be safe) don't
18+
* collide.
819
*/
920

1021
import { useCallback, useState } from "react";
@@ -35,19 +46,37 @@ const SAMPLES: readonly SampleSpec[] = [
3546
];
3647

3748
export interface SampleAttachmentButtonsProps {
49+
/**
50+
* Selector (scoped to `document`) that resolves to the wrapper element
51+
* rendered around `<CopilotChat />`. The component walks this element's
52+
* subtree to find the hidden file input CopilotChat renders.
53+
*/
3854
readonly rootSelector: string;
3955
}
4056

4157
function findChatFileInput(rootSelector: string): HTMLInputElement | null {
4258
if (typeof document === "undefined") return null;
4359
const root = document.querySelector(rootSelector);
4460
if (!root) return null;
61+
// CopilotChat renders exactly one hidden `<input type="file">` directly
62+
// inside its chatContainerRef div. Match on `type="file"` to avoid
63+
// sibling inputs (there are none today but it costs nothing to be
64+
// defensive).
4565
return root.querySelector<HTMLInputElement>('input[type="file"]');
4666
}
4767

68+
/**
69+
* Magic-byte prefixes used to validate fetched sample files. We check
70+
* these because Next.js will happily serve a Git LFS *pointer* file (a
71+
* short plain-text stub starting with `version https://git-lfs...`) with
72+
* a `Content-Type: image/png` header if LFS wasn't pulled at build time.
73+
* Without this guard, the broken pointer bytes get base64-encoded,
74+
* fed into CopilotChat as a valid-looking PNG, and rendered as a broken
75+
* <img>. Fail loudly with an actionable error instead.
76+
*/
4877
const MAGIC_BYTES: Record<string, number[]> = {
49-
"image/png": [0x89, 0x50, 0x4e, 0x47],
50-
"application/pdf": [0x25, 0x50, 0x44, 0x46],
78+
"image/png": [0x89, 0x50, 0x4e, 0x47], // ‰PNG
79+
"application/pdf": [0x25, 0x50, 0x44, 0x46], // %PDF
5180
};
5281

5382
const LFS_POINTER_PREFIX = "version https://git-lfs";
@@ -64,28 +93,36 @@ async function fetchAsFile(spec: SampleSpec): Promise<File> {
6493
const res = await fetch(spec.fetchUrl);
6594
if (!res.ok) {
6695
throw new Error(
67-
`Could not fetch sample "${spec.filename}" — HTTP ${res.status}.`,
96+
`Could not fetch sample "${spec.filename}" — HTTP ${res.status}. ` +
97+
`Is the file bundled under public${spec.fetchUrl}?`,
6898
);
6999
}
70100
const buffer = await res.arrayBuffer();
71101
const bytes = new Uint8Array(buffer);
72102

103+
// Detect Git LFS pointer stub — the file on disk hasn't been materialized.
73104
const asciiHead = new TextDecoder("utf-8", { fatal: false }).decode(
74105
bytes.slice(0, Math.min(bytes.length, 64)),
75106
);
76107
if (asciiHead.startsWith(LFS_POINTER_PREFIX)) {
77108
throw new Error(
78-
`Sample "${spec.filename}" is a Git LFS pointer, not the real asset.`,
109+
`Sample "${spec.filename}" is a Git LFS pointer, not the real asset. ` +
110+
"The deploy environment needs to run `git lfs pull` (or set " +
111+
"`GIT_LFS_ENABLED=1`) so the binary is checked out before the Next.js " +
112+
"app serves it.",
79113
);
80114
}
81115

82116
const expectedMagic = MAGIC_BYTES[spec.mimeType];
83117
if (expectedMagic && !bytesStartWith(bytes, expectedMagic)) {
84118
throw new Error(
85-
`Sample "${spec.filename}" does not have a valid ${spec.mimeType} signature.`,
119+
`Sample "${spec.filename}" does not have a valid ${spec.mimeType} ` +
120+
"signature. The file may be corrupted or a wrong asset was committed.",
86121
);
87122
}
88123

124+
// Re-wrap the bytes into a blob/File with the explicit MIME type rather than
125+
// trusting whatever Content-Type the dev server returned.
89126
const blob = new Blob([buffer], { type: spec.mimeType });
90127
return new File([blob], spec.filename, { type: spec.mimeType });
91128
}
@@ -104,13 +141,23 @@ export function SampleAttachmentButtons({
104141
const fileInput = findChatFileInput(rootSelector);
105142
if (!fileInput) {
106143
throw new Error(
107-
`CopilotChat file input not found under "${rootSelector}".`,
144+
`CopilotChat file input not found under "${rootSelector}". ` +
145+
"Is <CopilotChat /> mounted with `attachments.enabled: true`?",
108146
);
109147
}
110148
const file = await fetchAsFile(spec);
149+
150+
// Populate the file input's `.files` list via a fresh DataTransfer.
151+
// This is the only way to programmatically set `HTMLInputElement.files`
152+
// — assigning a plain array fails in every browser.
111153
const dt = new DataTransfer();
112154
dt.items.add(file);
113155
fileInput.files = dt.files;
156+
157+
// Dispatch a bubbling `change` event. CopilotChat's internal
158+
// `useAttachments.handleFileUpload` listens on `onChange`, which
159+
// React wires up as a native `change` listener — so a standard
160+
// DOM Event with `bubbles: true` reaches it.
114161
fileInput.dispatchEvent(new Event("change", { bubbles: true }));
115162
} catch (err) {
116163
console.error(
@@ -146,7 +193,7 @@ export function SampleAttachmentButtons({
146193
onClick={() => void injectSample(spec)}
147194
className="rounded border border-black/15 bg-white px-3 py-1 text-xs font-medium text-black transition hover:bg-black/5 disabled:cursor-not-allowed disabled:opacity-50 dark:border-white/15 dark:bg-neutral-900 dark:text-white dark:hover:bg-white/5"
148195
>
149-
{isLoading ? "Loading..." : spec.buttonLabel}
196+
{isLoading ? "Loading" : spec.buttonLabel}
150197
</button>
151198
);
152199
})}

0 commit comments

Comments
 (0)