88 * same pipeline the paperclip button uses.
99 *
1010 * Architecture:
11- * - Dedicated runtime route at `/api/copilotkit-multimodal`. The
12- * vision-capable model (gpt-4o) lives in src/agents/multimodal_agent.py.
13- * - Sample files live under `/public/demo-files/`.
11+ * - Dedicated runtime route at `/api/copilotkit-multimodal` (see
12+ * ../api/copilotkit-multimodal/route.ts). The vision-capable model
13+ * (gpt-4o) is scoped to just this demo, so other cells keep their
14+ * cheaper text-only models.
15+ * - Dedicated AG2 ConversableAgent at `src/agents/multimodal_agent.py`
16+ * under the slug `multimodal-demo`. Images are forwarded to the model
17+ * natively; PDFs are flattened to text on the Python side.
18+ * - Sample files live at `/demo-files/sample.png` and `/demo-files/sample.pdf`
19+ * (see `public/demo-files/`). The sample-buttons component fetches them
20+ * client-side, wraps the blob in a File, and drives the same hidden
21+ * `<input type="file">` the paperclip path uses (DataTransfer + dispatch
22+ * `change`). This keeps the sample and real-upload paths on a single
23+ * code path — whatever works for one works for both.
24+ *
25+ * Content flattening:
26+ * - AG2's AGUIStream validates message content as a plain string — it
27+ * does not accept arrays of content parts. A `ContentFlattenerShim`
28+ * extracts the text from multipart user messages before the AG-UI run
29+ * dispatches them to the AG2 backend.
1430 */
1531
16- import { useCallback } from "react" ;
17- import { CopilotKit , CopilotChat } from "@copilotkit/react-core/v2" ;
32+ import { useCallback , useEffect , useMemo } from "react" ;
33+ import { CopilotKit , CopilotChat , useAgent } from "@copilotkit/react-core/v2" ;
1834import type { AttachmentUploadResult } from "@copilotkit/shared" ;
1935
2036import { SampleAttachmentButtons } from "./sample-attachment-buttons" ;
2137
38+ /**
39+ * Minimal structural shape of an AG-UI message for the converter shim.
40+ */
41+ type AgentMessage = {
42+ id ?: string ;
43+ role : string ;
44+ content ?: unknown ;
45+ } ;
46+
47+ /**
48+ * `onUpload` must resolve to an `AttachmentUploadResult` (data or url). We
49+ * always return the `data` variant — the demo inlines base64 instead of
50+ * uploading to external storage.
51+ */
2252type DataUploadResult = Extract < AttachmentUploadResult , { type : "data" } > ;
2353
2454const MAX_FILE_SIZE_BYTES = 10 * 1024 * 1024 ; // 10 MB
2555const ACCEPT_MIME = "image/*,application/pdf" ;
56+ /**
57+ * Selector used by <SampleAttachmentButtons /> to locate CopilotChat's
58+ * hidden file input. Kept as a constant so the wrapper element and the
59+ * sample buttons cannot drift.
60+ */
2661const CHAT_ROOT_SELECTOR = "[data-multimodal-demo-chat-root]" ;
2762
63+ /**
64+ * Convert a File into the `AttachmentsConfig.onUpload` result shape —
65+ * inline base64 with the browser-provided mime type. We do this in the
66+ * browser rather than uploading to external storage because the demo is
67+ * self-contained; `maxSize: 10 MB` (set below) caps bloat.
68+ *
69+ * `FileReader` produces a `data:<mime>;base64,<payload>` URL; we strip the
70+ * prefix so the runtime forwards the raw base64 value (what the agent
71+ * expects in `source.value`).
72+ */
2873function fileToDataAttachment ( file : File ) : Promise < DataUploadResult > {
2974 return new Promise ( ( resolve , reject ) => {
3075 const reader = new FileReader ( ) ;
@@ -36,6 +81,7 @@ function fileToDataAttachment(file: File): Promise<DataUploadResult> {
3681 reject ( new Error ( `Unexpected FileReader result type for ${ file . name } ` ) ) ;
3782 return ;
3883 }
84+ // result looks like "data:image/png;base64,iVBORw0K..." — strip the prefix.
3985 const commaIdx = result . indexOf ( "," ) ;
4086 const base64 = commaIdx >= 0 ? result . slice ( commaIdx + 1 ) : result ;
4187 resolve ( {
@@ -52,11 +98,96 @@ function fileToDataAttachment(file: File): Promise<DataUploadResult> {
5298 } ) ;
5399}
54100
101+ /**
102+ * AG2's AGUIStream validates message content as a plain string — it does
103+ * NOT accept arrays of content parts. When CopilotChat sends multipart
104+ * content (text + image/document), we flatten the array down to a single
105+ * string by extracting the text parts and noting attachments inline.
106+ *
107+ * This keeps the text visible to the AG2 agent (and to aimock's
108+ * `userMessage` matcher) while preventing the 400 validation error.
109+ */
110+ function flattenContent ( content : unknown ) : string {
111+ if ( typeof content === "string" ) return content ;
112+ if ( ! Array . isArray ( content ) ) return String ( content ?? "" ) ;
113+ const pieces : string [ ] = [ ] ;
114+ for ( const part of content ) {
115+ if ( ! part || typeof part !== "object" ) continue ;
116+ const p = part as { type ?: string ; text ?: string } ;
117+ if ( p . type === "text" && typeof p . text === "string" ) {
118+ pieces . push ( p . text ) ;
119+ }
120+ // Non-text parts (image, document, etc.) are silently dropped since
121+ // AG2's ConversableAgent cannot accept binary content parts. The
122+ // Python-side agent still receives the text question and responds.
123+ }
124+ return pieces . join ( "\n" ) ;
125+ }
126+
127+ /**
128+ * Walk all user messages and flatten multipart content to plain strings.
129+ */
130+ function flattenMessagesForAG2 (
131+ messages : ReadonlyArray < Readonly < AgentMessage > > ,
132+ ) : AgentMessage [ ] | null {
133+ let mutated = false ;
134+ const next = messages . map ( ( msg ) => {
135+ if ( msg . role !== "user" ) return msg as AgentMessage ;
136+ const content = msg . content ;
137+ if ( ! Array . isArray ( content ) ) return msg as AgentMessage ;
138+ mutated = true ;
139+ return {
140+ ...( msg as object ) ,
141+ content : flattenContent ( content ) ,
142+ } as AgentMessage ;
143+ } ) ;
144+ return mutated ? next : null ;
145+ }
146+
147+ /**
148+ * Subscribes to the active agent and flattens outgoing multipart content
149+ * to plain strings before the AG-UI run dispatches them to AG2.
150+ */
151+ function ContentFlattenerShim ( ) {
152+ const { agent } = useAgent ( { agentId : "multimodal-demo" } ) ;
153+
154+ const subscriber = useMemo (
155+ ( ) => ( {
156+ onRunInitialized : ( {
157+ messages,
158+ } : {
159+ messages : ReadonlyArray < Readonly < AgentMessage > > ;
160+ } ) => {
161+ const flattened = flattenMessagesForAG2 ( messages ) ;
162+ if ( ! flattened ) return ;
163+ return { messages : flattened } ;
164+ } ,
165+ } ) ,
166+ [ ] ,
167+ ) ;
168+
169+ useEffect ( ( ) => {
170+ if ( ! agent ) return ;
171+ const handle = agent . subscribe (
172+ subscriber as unknown as Parameters < typeof agent . subscribe > [ 0 ] ,
173+ ) ;
174+ return ( ) => handle . unsubscribe ( ) ;
175+ } , [ agent , subscriber ] ) ;
176+
177+ return null ;
178+ }
179+
55180export default function MultimodalDemoPage ( ) {
181+ // `onUpload` is passed into CopilotChat's `AttachmentsConfig`. Both the
182+ // paperclip button and the sample-injection path route files through
183+ // this same function (sample buttons drive CopilotChat's hidden file
184+ // input, which calls this internally via `useAttachments`). No
185+ // duplicated upload code lives in the sample-button component.
56186 const onUpload = useCallback ( fileToDataAttachment , [ ] ) ;
57187
58188 return (
59189 < CopilotKit runtimeUrl = "/api/copilotkit-multimodal" agent = "multimodal-demo" >
190+ < ContentFlattenerShim />
60191 < div
61192 data-testid = "multimodal-demo-root"
62193 className = "mx-auto flex h-screen max-w-4xl flex-col gap-3 p-4 sm:p-6"
@@ -85,6 +216,8 @@ export default function MultimodalDemoPage() {
85216 maxSize : MAX_FILE_SIZE_BYTES ,
86217 onUpload,
87218 onUploadFailed : ( err ) => {
219+ // Log without disrupting the default UI — CopilotChat already
220+ // shows a toast-style indicator on validation failure.
88221 console . warn ( "[multimodal-demo] attachment rejected" , err ) ;
89222 } ,
90223 } }
0 commit comments