From f41275459f71605388c51ebaeacb2e08b84abf7f Mon Sep 17 00:00:00 2001 From: Andy Adams-Moran Date: Tue, 31 Mar 2026 15:09:54 +0100 Subject: [PATCH 01/12] Pass structured tool results via RPC instead of stringifying In both the Node and Go SDKs, _executeToolAndRespond / executeToolAndRespond was converting structured ToolResultObject values into plain JSON strings before sending them over RPC. This destroyed the object shape, causing toolTelemetry (and other structured fields like resultType) to be silently lost on the server side. Node SDK: detect ToolResultObject by checking for textResultForLlm + resultType properties and pass it directly to handlePendingToolCall, which already accepts the union type (string | object) in its RPC schema. Go SDK: send a ResultUnion with ResultResult populated (preserving TextResultForLlm, ResultType, Error, and ToolTelemetry) instead of extracting only the text and sending ResultUnion with String. Fixes the SDK side of github/copilot-agent-runtime#5574. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- go/session.go | 16 ++++++++++++---- nodejs/src/session.ts | 21 ++++++++++++++++++++- 2 files changed, 32 insertions(+), 5 deletions(-) diff --git a/go/session.go b/go/session.go index 5be626b52..62298775d 100644 --- a/go/session.go +++ b/go/session.go @@ -620,13 +620,21 @@ func (s *Session) executeToolAndRespond(requestID, toolName, toolCallID string, return } - resultStr := result.TextResultForLLM - if resultStr == "" { - resultStr = fmt.Sprintf("%v", result) + rpcResult := rpc.ResultUnion{ + ResultResult: &rpc.ResultResult{ + TextResultForLlm: result.TextResultForLLM, + ToolTelemetry: result.ToolTelemetry, + }, + } + if result.ResultType != "" { + rpcResult.ResultResult.ResultType = &result.ResultType + } + if result.Error != "" { + rpcResult.ResultResult.Error = &result.Error } s.RPC.Tools.HandlePendingToolCall(ctx, &rpc.SessionToolsHandlePendingToolCallParams{ RequestID: requestID, - Result: &rpc.ResultUnion{String: &resultStr}, + Result: &rpcResult, }) } diff --git a/nodejs/src/session.ts b/nodejs/src/session.ts index cb2cf826b..e6a51c0a3 100644 --- a/nodejs/src/session.ts +++ b/nodejs/src/session.ts @@ -33,6 +33,8 @@ import type { SessionUiApi, Tool, ToolHandler, + ToolResult, + ToolResultObject, TraceContextProvider, TypedSessionEventHandler, UserInputHandler, @@ -459,11 +461,13 @@ export class CopilotSession { traceparent, tracestate, }); - let result: string; + let result: ToolResult; if (rawResult == null) { result = ""; } else if (typeof rawResult === "string") { result = rawResult; + } else if (isToolResultObject(rawResult)) { + result = rawResult; } else { result = JSON.stringify(rawResult); } @@ -1043,3 +1047,18 @@ export class CopilotSession { await this.rpc.log({ message, ...options }); } } + +/** + * Type guard that checks whether a value is a {@link ToolResultObject}. + * A valid object must have a string `textResultForLlm` and a string `resultType`. + */ +function isToolResultObject(value: unknown): value is ToolResultObject { + return ( + typeof value === "object" && + value !== null && + "textResultForLlm" in value && + typeof (value as ToolResultObject).textResultForLlm === "string" && + "resultType" in value && + typeof (value as ToolResultObject).resultType === "string" + ); +} From d6f386182c91edc4fe535afdc80995e7f1bfddd8 Mon Sep 17 00:00:00 2001 From: Andy Adams-Moran Date: Wed, 1 Apr 2026 13:17:10 +0100 Subject: [PATCH 02/12] Address review feedback: restore Go fallback, add test, fix snapshot - Go: restore fallback to fmt.Sprintf when TextResultForLLM is empty, preserving prior behavior for handlers that don't set it. - Node: add e2e test verifying toolTelemetry is not leaked into LLM content and that the tool.execution_complete event fires. - Update failure-resulttype snapshot to expect just textResultForLlm content instead of the old stringified JSON. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- go/session.go | 7 ++- nodejs/test/e2e/tool_results.test.ts | 51 ++++++++++++++++++- ...e_tool_result_with_failure_resulttype.yaml | 2 +- ..._stringify_structured_results_for_llm.yaml | 20 ++++++++ 4 files changed, 76 insertions(+), 4 deletions(-) create mode 100644 test/snapshots/tool_results/should_preserve_tooltelemetry_and_not_stringify_structured_results_for_llm.yaml diff --git a/go/session.go b/go/session.go index 62298775d..49a4b1a63 100644 --- a/go/session.go +++ b/go/session.go @@ -620,9 +620,14 @@ func (s *Session) executeToolAndRespond(requestID, toolName, toolCallID string, return } + textResultForLLM := result.TextResultForLLM + if textResultForLLM == "" { + textResultForLLM = fmt.Sprintf("%v", result) + } + rpcResult := rpc.ResultUnion{ ResultResult: &rpc.ResultResult{ - TextResultForLlm: result.TextResultForLLM, + TextResultForLlm: textResultForLLM, ToolTelemetry: result.ToolTelemetry, }, } diff --git a/nodejs/test/e2e/tool_results.test.ts b/nodejs/test/e2e/tool_results.test.ts index 66e715490..da42e6a3d 100644 --- a/nodejs/test/e2e/tool_results.test.ts +++ b/nodejs/test/e2e/tool_results.test.ts @@ -4,12 +4,12 @@ import { describe, expect, it } from "vitest"; import { z } from "zod"; -import type { ToolResultObject } from "../../src/index.js"; +import type { SessionEvent, ToolResultObject } from "../../src/index.js"; import { approveAll, defineTool } from "../../src/index.js"; import { createSdkTestContext } from "./harness/sdkTestContext"; describe("Tool Results", async () => { - const { copilotClient: client } = await createSdkTestContext(); + const { copilotClient: client, openAiEndpoint } = await createSdkTestContext(); it("should handle structured ToolResultObject from custom tool", async () => { const session = await client.createSession({ @@ -98,4 +98,51 @@ describe("Tool Results", async () => { await session.disconnect(); }); + + it("should preserve toolTelemetry and not stringify structured results for LLM", async () => { + const session = await client.createSession({ + onPermissionRequest: approveAll, + tools: [ + defineTool("analyze_code", { + description: "Analyzes code for issues", + parameters: z.object({ + file: z.string(), + }), + handler: ({ file }): ToolResultObject => ({ + textResultForLlm: `Analysis of ${file}: no issues found`, + resultType: "success", + toolTelemetry: { + metrics: { analysisTimeMs: 150 }, + properties: { analyzer: "eslint" }, + }, + }), + }), + ], + }); + + const events: SessionEvent[] = []; + session.on((event) => events.push(event)); + + const assistantMessage = await session.sendAndWait({ + prompt: "Analyze the file main.ts for issues.", + }); + + expect(assistantMessage?.data.content).toMatch(/no issues/i); + + // Verify the LLM received just textResultForLlm, not stringified JSON + const traffic = await openAiEndpoint.getExchanges(); + const lastConversation = traffic[traffic.length - 1]!; + const toolResults = lastConversation.request.messages.filter( + (m: { role: string }) => m.role === "tool" + ); + expect(toolResults.length).toBe(1); + expect(toolResults[0]!.content).not.toContain("toolTelemetry"); + expect(toolResults[0]!.content).not.toContain("resultType"); + + // Verify tool.execution_complete event carries toolTelemetry + const toolCompletes = events.filter((e) => e.type === "tool.execution_complete"); + expect(toolCompletes.length).toBeGreaterThanOrEqual(1); + + await session.disconnect(); + }); }); diff --git a/test/snapshots/tool_results/should_handle_tool_result_with_failure_resulttype.yaml b/test/snapshots/tool_results/should_handle_tool_result_with_failure_resulttype.yaml index 7c5ac7301..3fddb1600 100644 --- a/test/snapshots/tool_results/should_handle_tool_result_with_failure_resulttype.yaml +++ b/test/snapshots/tool_results/should_handle_tool_result_with_failure_resulttype.yaml @@ -15,6 +15,6 @@ conversations: arguments: "{}" - role: tool tool_call_id: toolcall_0 - content: '{"error":"API timeout","resultType":"failure","textResultForLlm":"Service unavailable"}' + content: Service unavailable - role: assistant content: service is down diff --git a/test/snapshots/tool_results/should_preserve_tooltelemetry_and_not_stringify_structured_results_for_llm.yaml b/test/snapshots/tool_results/should_preserve_tooltelemetry_and_not_stringify_structured_results_for_llm.yaml new file mode 100644 index 000000000..0e0158701 --- /dev/null +++ b/test/snapshots/tool_results/should_preserve_tooltelemetry_and_not_stringify_structured_results_for_llm.yaml @@ -0,0 +1,20 @@ +models: + - claude-sonnet-4.5 +conversations: + - messages: + - role: system + content: ${system} + - role: user + content: Analyze the file main.ts for issues. + - role: assistant + tool_calls: + - id: toolcall_0 + type: function + function: + name: analyze_code + arguments: '{"file":"main.ts"}' + - role: tool + tool_call_id: toolcall_0 + content: "Analysis of main.ts: no issues found" + - role: assistant + content: "The analysis of main.ts is complete -- no issues were found." From 6677e8d6bbfda1f54ff8f4614b67bdbe29f35ba8 Mon Sep 17 00:00:00 2001 From: Andy Adams-Moran Date: Wed, 1 Apr 2026 15:27:42 +0100 Subject: [PATCH 03/12] Strengthen toolTelemetry test assertions Assert tool.execution_complete event has success=true and that toolTelemetry, when present, is non-empty (not the {} that results from the old stringification bug). Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- nodejs/test/e2e/tool_results.test.ts | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/nodejs/test/e2e/tool_results.test.ts b/nodejs/test/e2e/tool_results.test.ts index da42e6a3d..3c1b20e2f 100644 --- a/nodejs/test/e2e/tool_results.test.ts +++ b/nodejs/test/e2e/tool_results.test.ts @@ -139,9 +139,16 @@ describe("Tool Results", async () => { expect(toolResults[0]!.content).not.toContain("toolTelemetry"); expect(toolResults[0]!.content).not.toContain("resultType"); - // Verify tool.execution_complete event carries toolTelemetry + // Verify tool.execution_complete event fires for this tool call const toolCompletes = events.filter((e) => e.type === "tool.execution_complete"); expect(toolCompletes.length).toBeGreaterThanOrEqual(1); + const completeEvent = toolCompletes[0]!; + expect(completeEvent.data.success).toBe(true); + // When the server preserves the structured result, toolTelemetry should + // be present and non-empty (not the {} that results from stringification). + if (completeEvent.data.toolTelemetry) { + expect(Object.keys(completeEvent.data.toolTelemetry).length).toBeGreaterThan(0); + } await session.disconnect(); }); From 100aeb77e77c0bd5f9679a359ab006729b786e58 Mon Sep 17 00:00:00 2001 From: Andy Adams-Moran Date: Wed, 1 Apr 2026 16:02:32 +0100 Subject: [PATCH 04/12] Add tool_results e2e tests for Python, Go, and .NET Mirror the Node e2e tests from tool_results.test.ts in all three remaining SDK languages, reusing the shared YAML snapshots. Each suite covers: - structured ToolResultObject with success resultType - failure resultType - toolTelemetry preservation (verifies LLM content has no stringified JSON and no toolTelemetry/resultType leakage) Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- dotnet/test/ToolResultsTests.cs | 113 ++++++++++++++++++ go/internal/e2e/tool_results_test.go | 171 +++++++++++++++++++++++++++ python/e2e/test_tool_results.py | 91 ++++++++++++++ 3 files changed, 375 insertions(+) create mode 100644 dotnet/test/ToolResultsTests.cs create mode 100644 go/internal/e2e/tool_results_test.go create mode 100644 python/e2e/test_tool_results.py diff --git a/dotnet/test/ToolResultsTests.cs b/dotnet/test/ToolResultsTests.cs new file mode 100644 index 000000000..172428e6c --- /dev/null +++ b/dotnet/test/ToolResultsTests.cs @@ -0,0 +1,113 @@ +/*--------------------------------------------------------------------------------------------- + * Copyright (c) Microsoft Corporation. All rights reserved. + *--------------------------------------------------------------------------------------------*/ + +using GitHub.Copilot.SDK.Test.Harness; +using Microsoft.Extensions.AI; +using System.ComponentModel; +using Xunit; +using Xunit.Abstractions; + +namespace GitHub.Copilot.SDK.Test; + +public class ToolResultsTests(E2ETestFixture fixture, ITestOutputHelper output) : E2ETestBase(fixture, "tool_results", output) +{ + [Fact] + public async Task Should_Handle_Structured_ToolResultObject_From_Custom_Tool() + { + var session = await CreateSessionAsync(new SessionConfig + { + Tools = [AIFunctionFactory.Create(GetWeather, "get_weather")], + OnPermissionRequest = PermissionHandler.ApproveAll, + }); + + await session.SendAsync(new MessageOptions + { + Prompt = "What's the weather in Paris?" + }); + + var assistantMessage = await TestHelper.GetFinalAssistantMessageAsync(session); + Assert.NotNull(assistantMessage); + Assert.Matches("(?i)sunny|72", assistantMessage!.Data.Content ?? string.Empty); + + [Description("Gets weather for a city")] + static ToolResultAIContent GetWeather([Description("City name")] string city) + => new(new() + { + TextResultForLlm = $"The weather in {city} is sunny and 72°F", + ResultType = "success", + }); + } + + [Fact] + public async Task Should_Handle_Tool_Result_With_Failure_ResultType() + { + var session = await CreateSessionAsync(new SessionConfig + { + Tools = [AIFunctionFactory.Create(CheckStatus, "check_status")], + OnPermissionRequest = PermissionHandler.ApproveAll, + }); + + await session.SendAsync(new MessageOptions + { + Prompt = "Check the status of the service using check_status. If it fails, say 'service is down'." + }); + + var assistantMessage = await TestHelper.GetFinalAssistantMessageAsync(session); + Assert.NotNull(assistantMessage); + Assert.Contains("service is down", assistantMessage!.Data.Content?.ToLowerInvariant() ?? string.Empty); + + [Description("Checks the status of a service")] + static ToolResultAIContent CheckStatus() + => new(new() + { + TextResultForLlm = "Service unavailable", + ResultType = "failure", + Error = "API timeout", + }); + } + + [Fact] + public async Task Should_Preserve_ToolTelemetry_And_Not_Stringify_Structured_Results_For_LLM() + { + var session = await CreateSessionAsync(new SessionConfig + { + Tools = [AIFunctionFactory.Create(AnalyzeCode, "analyze_code")], + OnPermissionRequest = PermissionHandler.ApproveAll, + }); + + await session.SendAsync(new MessageOptions + { + Prompt = "Analyze the file main.ts for issues." + }); + + var assistantMessage = await TestHelper.GetFinalAssistantMessageAsync(session); + Assert.NotNull(assistantMessage); + Assert.Contains("no issues", assistantMessage!.Data.Content?.ToLowerInvariant() ?? string.Empty); + + // Verify the LLM received just textResultForLlm, not stringified JSON + var traffic = await Ctx.GetExchangesAsync(); + var lastConversation = traffic[^1]; + + var toolResults = lastConversation.Request.Messages + .Where(m => m.Role == "tool") + .ToList(); + + Assert.Single(toolResults); + Assert.DoesNotContain("toolTelemetry", toolResults[0].Content); + Assert.DoesNotContain("resultType", toolResults[0].Content); + + [Description("Analyzes code for issues")] + static ToolResultAIContent AnalyzeCode([Description("File to analyze")] string file) + => new(new() + { + TextResultForLlm = $"Analysis of {file}: no issues found", + ResultType = "success", + ToolTelemetry = new Dictionary + { + ["metrics"] = new Dictionary { ["analysisTimeMs"] = 150 }, + ["properties"] = new Dictionary { ["analyzer"] = "eslint" }, + }, + }); + } +} diff --git a/go/internal/e2e/tool_results_test.go b/go/internal/e2e/tool_results_test.go new file mode 100644 index 000000000..cc21e30b6 --- /dev/null +++ b/go/internal/e2e/tool_results_test.go @@ -0,0 +1,171 @@ +package e2e + +import ( + "strings" + "testing" + + copilot "github.com/github/copilot-sdk/go" + "github.com/github/copilot-sdk/go/internal/e2e/testharness" +) + +func TestToolResults(t *testing.T) { + ctx := testharness.NewTestContext(t) + client := ctx.NewClient() + t.Cleanup(func() { client.ForceStop() }) + + t.Run("should handle structured toolresultobject from custom tool", func(t *testing.T) { + ctx.ConfigureForTest(t) + + type WeatherParams struct { + City string `json:"city" jsonschema:"City name"` + } + + session, err := client.CreateSession(t.Context(), &copilot.SessionConfig{ + OnPermissionRequest: copilot.PermissionHandler.ApproveAll, + Tools: []copilot.Tool{ + copilot.DefineTool("get_weather", "Gets weather for a city", + func(params WeatherParams, inv copilot.ToolInvocation) (copilot.ToolResult, error) { + return copilot.ToolResult{ + TextResultForLLM: "The weather in " + params.City + " is sunny and 72°F", + ResultType: "success", + }, nil + }), + }, + }) + if err != nil { + t.Fatalf("Failed to create session: %v", err) + } + + _, err = session.Send(t.Context(), copilot.MessageOptions{Prompt: "What's the weather in Paris?"}) + if err != nil { + t.Fatalf("Failed to send message: %v", err) + } + + answer, err := testharness.GetFinalAssistantMessage(t.Context(), session) + if err != nil { + t.Fatalf("Failed to get assistant message: %v", err) + } + + content := "" + if answer.Data.Content != nil { + content = *answer.Data.Content + } + if !strings.Contains(strings.ToLower(content), "sunny") && !strings.Contains(content, "72") { + t.Errorf("Expected answer to mention sunny or 72, got %q", content) + } + }) + + t.Run("should handle tool result with failure resulttype", func(t *testing.T) { + ctx.ConfigureForTest(t) + + session, err := client.CreateSession(t.Context(), &copilot.SessionConfig{ + OnPermissionRequest: copilot.PermissionHandler.ApproveAll, + Tools: []copilot.Tool{ + { + Name: "check_status", + Description: "Checks the status of a service", + Handler: func(inv copilot.ToolInvocation) (copilot.ToolResult, error) { + return copilot.ToolResult{ + TextResultForLLM: "Service unavailable", + ResultType: "failure", + Error: "API timeout", + }, nil + }, + }, + }, + }) + if err != nil { + t.Fatalf("Failed to create session: %v", err) + } + + _, err = session.Send(t.Context(), copilot.MessageOptions{ + Prompt: "Check the status of the service using check_status. If it fails, say 'service is down'.", + }) + if err != nil { + t.Fatalf("Failed to send message: %v", err) + } + + answer, err := testharness.GetFinalAssistantMessage(t.Context(), session) + if err != nil { + t.Fatalf("Failed to get assistant message: %v", err) + } + + content := "" + if answer.Data.Content != nil { + content = *answer.Data.Content + } + if !strings.Contains(strings.ToLower(content), "service is down") { + t.Errorf("Expected 'service is down', got %q", content) + } + }) + + t.Run("should preserve tooltelemetry and not stringify structured results for llm", func(t *testing.T) { + ctx.ConfigureForTest(t) + + type AnalyzeParams struct { + File string `json:"file" jsonschema:"File to analyze"` + } + + session, err := client.CreateSession(t.Context(), &copilot.SessionConfig{ + OnPermissionRequest: copilot.PermissionHandler.ApproveAll, + Tools: []copilot.Tool{ + copilot.DefineTool("analyze_code", "Analyzes code for issues", + func(params AnalyzeParams, inv copilot.ToolInvocation) (copilot.ToolResult, error) { + return copilot.ToolResult{ + TextResultForLLM: "Analysis of " + params.File + ": no issues found", + ResultType: "success", + ToolTelemetry: map[string]any{ + "metrics": map[string]any{"analysisTimeMs": 150}, + "properties": map[string]any{"analyzer": "eslint"}, + }, + }, nil + }), + }, + }) + if err != nil { + t.Fatalf("Failed to create session: %v", err) + } + + _, err = session.Send(t.Context(), copilot.MessageOptions{Prompt: "Analyze the file main.ts for issues."}) + if err != nil { + t.Fatalf("Failed to send message: %v", err) + } + + answer, err := testharness.GetFinalAssistantMessage(t.Context(), session) + if err != nil { + t.Fatalf("Failed to get assistant message: %v", err) + } + + content := "" + if answer.Data.Content != nil { + content = *answer.Data.Content + } + if !strings.Contains(strings.ToLower(content), "no issues") { + t.Errorf("Expected 'no issues', got %q", content) + } + + // Verify the LLM received just textResultForLlm, not stringified JSON + traffic, err := ctx.GetExchanges() + if err != nil { + t.Fatalf("Failed to get exchanges: %v", err) + } + + lastConversation := traffic[len(traffic)-1] + var toolResults []testharness.ChatCompletionMessage + for _, msg := range lastConversation.Request.Messages { + if msg.Role == "tool" { + toolResults = append(toolResults, msg) + } + } + + if len(toolResults) != 1 { + t.Fatalf("Expected 1 tool result, got %d", len(toolResults)) + } + if strings.Contains(toolResults[0].Content, "toolTelemetry") { + t.Error("Tool result content should not contain 'toolTelemetry'") + } + if strings.Contains(toolResults[0].Content, "resultType") { + t.Error("Tool result content should not contain 'resultType'") + } + }) +} diff --git a/python/e2e/test_tool_results.py b/python/e2e/test_tool_results.py new file mode 100644 index 000000000..e1c21f2ae --- /dev/null +++ b/python/e2e/test_tool_results.py @@ -0,0 +1,91 @@ +"""E2E Tool Results Tests""" + +import pytest +from pydantic import BaseModel, Field + +from copilot import define_tool +from copilot.session import PermissionHandler +from copilot.tools import ToolInvocation, ToolResult + +from .testharness import E2ETestContext, get_final_assistant_message + +pytestmark = pytest.mark.asyncio(loop_scope="module") + + +class TestToolResults: + async def test_should_handle_structured_toolresultobject_from_custom_tool( + self, ctx: E2ETestContext + ): + class WeatherParams(BaseModel): + city: str = Field(description="City name") + + @define_tool("get_weather", description="Gets weather for a city") + def get_weather(params: WeatherParams, invocation: ToolInvocation) -> ToolResult: + return ToolResult( + text_result_for_llm=f"The weather in {params.city} is sunny and 72°F", + result_type="success", + ) + + session = await ctx.client.create_session( + on_permission_request=PermissionHandler.approve_all, tools=[get_weather] + ) + + await session.send("What's the weather in Paris?") + assistant_message = await get_final_assistant_message(session) + assert "sunny" in assistant_message.data.content.lower() or "72" in assistant_message.data.content + + async def test_should_handle_tool_result_with_failure_resulttype( + self, ctx: E2ETestContext + ): + @define_tool("check_status", description="Checks the status of a service") + def check_status(invocation: ToolInvocation) -> ToolResult: + return ToolResult( + text_result_for_llm="Service unavailable", + result_type="failure", + error="API timeout", + ) + + session = await ctx.client.create_session( + on_permission_request=PermissionHandler.approve_all, tools=[check_status] + ) + + answer = await session.send_and_wait( + "Check the status of the service using check_status. If it fails, say 'service is down'." + ) + assert answer is not None + assert "service is down" in answer.data.content.lower() + + async def test_should_preserve_tooltelemetry_and_not_stringify_structured_results_for_llm( + self, ctx: E2ETestContext + ): + class AnalyzeParams(BaseModel): + file: str = Field(description="File to analyze") + + @define_tool("analyze_code", description="Analyzes code for issues") + def analyze_code(params: AnalyzeParams, invocation: ToolInvocation) -> ToolResult: + return ToolResult( + text_result_for_llm=f"Analysis of {params.file}: no issues found", + result_type="success", + tool_telemetry={ + "metrics": {"analysisTimeMs": 150}, + "properties": {"analyzer": "eslint"}, + }, + ) + + session = await ctx.client.create_session( + on_permission_request=PermissionHandler.approve_all, tools=[analyze_code] + ) + + await session.send("Analyze the file main.ts for issues.") + assistant_message = await get_final_assistant_message(session) + assert "no issues" in assistant_message.data.content.lower() + + # Verify the LLM received just textResultForLlm, not stringified JSON + traffic = await ctx.get_exchanges() + last_conversation = traffic[-1] + tool_results = [ + m for m in last_conversation["request"]["messages"] if m["role"] == "tool" + ] + assert len(tool_results) == 1 + assert "toolTelemetry" not in tool_results[0]["content"] + assert "resultType" not in tool_results[0]["content"] From 13d2f5ffddef4e298b1b496505f5fc640224de7c Mon Sep 17 00:00:00 2001 From: Andy Adams-Moran Date: Wed, 1 Apr 2026 16:10:11 +0100 Subject: [PATCH 05/12] Fix Python ruff formatting and .NET JSON serialization - Python: run ruff format on test_tool_results.py - .NET: add JsonSerializerContext with ToolResultAIContent and pass serializerOptions to AIFunctionFactory.Create, matching the pattern used in ToolsTests for complex types. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- dotnet/test/ToolResultsTests.cs | 14 +++++++++++--- python/e2e/test_tool_results.py | 13 ++++++------- 2 files changed, 17 insertions(+), 10 deletions(-) diff --git a/dotnet/test/ToolResultsTests.cs b/dotnet/test/ToolResultsTests.cs index 172428e6c..5459f6527 100644 --- a/dotnet/test/ToolResultsTests.cs +++ b/dotnet/test/ToolResultsTests.cs @@ -5,6 +5,8 @@ using GitHub.Copilot.SDK.Test.Harness; using Microsoft.Extensions.AI; using System.ComponentModel; +using System.Text.Json; +using System.Text.Json.Serialization; using Xunit; using Xunit.Abstractions; @@ -12,12 +14,18 @@ namespace GitHub.Copilot.SDK.Test; public class ToolResultsTests(E2ETestFixture fixture, ITestOutputHelper output) : E2ETestBase(fixture, "tool_results", output) { + [JsonSourceGenerationOptions(JsonSerializerDefaults.Web)] + [JsonSerializable(typeof(ToolResultAIContent))] + [JsonSerializable(typeof(ToolResultObject))] + [JsonSerializable(typeof(JsonElement))] + private partial class ToolResultsJsonContext : JsonSerializerContext; + [Fact] public async Task Should_Handle_Structured_ToolResultObject_From_Custom_Tool() { var session = await CreateSessionAsync(new SessionConfig { - Tools = [AIFunctionFactory.Create(GetWeather, "get_weather")], + Tools = [AIFunctionFactory.Create(GetWeather, "get_weather", serializerOptions: ToolResultsJsonContext.Default.Options)], OnPermissionRequest = PermissionHandler.ApproveAll, }); @@ -44,7 +52,7 @@ public async Task Should_Handle_Tool_Result_With_Failure_ResultType() { var session = await CreateSessionAsync(new SessionConfig { - Tools = [AIFunctionFactory.Create(CheckStatus, "check_status")], + Tools = [AIFunctionFactory.Create(CheckStatus, "check_status", serializerOptions: ToolResultsJsonContext.Default.Options)], OnPermissionRequest = PermissionHandler.ApproveAll, }); @@ -72,7 +80,7 @@ public async Task Should_Preserve_ToolTelemetry_And_Not_Stringify_Structured_Res { var session = await CreateSessionAsync(new SessionConfig { - Tools = [AIFunctionFactory.Create(AnalyzeCode, "analyze_code")], + Tools = [AIFunctionFactory.Create(AnalyzeCode, "analyze_code", serializerOptions: ToolResultsJsonContext.Default.Options)], OnPermissionRequest = PermissionHandler.ApproveAll, }); diff --git a/python/e2e/test_tool_results.py b/python/e2e/test_tool_results.py index e1c21f2ae..564172f0e 100644 --- a/python/e2e/test_tool_results.py +++ b/python/e2e/test_tool_results.py @@ -32,11 +32,12 @@ def get_weather(params: WeatherParams, invocation: ToolInvocation) -> ToolResult await session.send("What's the weather in Paris?") assistant_message = await get_final_assistant_message(session) - assert "sunny" in assistant_message.data.content.lower() or "72" in assistant_message.data.content + assert ( + "sunny" in assistant_message.data.content.lower() + or "72" in assistant_message.data.content + ) - async def test_should_handle_tool_result_with_failure_resulttype( - self, ctx: E2ETestContext - ): + async def test_should_handle_tool_result_with_failure_resulttype(self, ctx: E2ETestContext): @define_tool("check_status", description="Checks the status of a service") def check_status(invocation: ToolInvocation) -> ToolResult: return ToolResult( @@ -83,9 +84,7 @@ def analyze_code(params: AnalyzeParams, invocation: ToolInvocation) -> ToolResul # Verify the LLM received just textResultForLlm, not stringified JSON traffic = await ctx.get_exchanges() last_conversation = traffic[-1] - tool_results = [ - m for m in last_conversation["request"]["messages"] if m["role"] == "tool" - ] + tool_results = [m for m in last_conversation["request"]["messages"] if m["role"] == "tool"] assert len(tool_results) == 1 assert "toolTelemetry" not in tool_results[0]["content"] assert "resultType" not in tool_results[0]["content"] From 0b1d5a0140e795a9e41d38c195034500a07df4b9 Mon Sep 17 00:00:00 2001 From: Andy Adams-Moran Date: Wed, 1 Apr 2026 16:17:16 +0100 Subject: [PATCH 06/12] Fix Python line length and .NET partial class for source gen - Python: split long string on line 54 to stay under 100 char limit - .NET: mark ToolResultsTests as partial so the nested ToolResultsJsonContext source generator can emit code Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- dotnet/test/ToolResultsTests.cs | 2 +- python/e2e/test_tool_results.py | 3 ++- ...telemetry_and_not_stringify_structured_results_for_llm.yaml | 2 +- 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/dotnet/test/ToolResultsTests.cs b/dotnet/test/ToolResultsTests.cs index 5459f6527..0fc36557c 100644 --- a/dotnet/test/ToolResultsTests.cs +++ b/dotnet/test/ToolResultsTests.cs @@ -12,7 +12,7 @@ namespace GitHub.Copilot.SDK.Test; -public class ToolResultsTests(E2ETestFixture fixture, ITestOutputHelper output) : E2ETestBase(fixture, "tool_results", output) +public partial class ToolResultsTests(E2ETestFixture fixture, ITestOutputHelper output) : E2ETestBase(fixture, "tool_results", output) { [JsonSourceGenerationOptions(JsonSerializerDefaults.Web)] [JsonSerializable(typeof(ToolResultAIContent))] diff --git a/python/e2e/test_tool_results.py b/python/e2e/test_tool_results.py index 564172f0e..516af8859 100644 --- a/python/e2e/test_tool_results.py +++ b/python/e2e/test_tool_results.py @@ -51,7 +51,8 @@ def check_status(invocation: ToolInvocation) -> ToolResult: ) answer = await session.send_and_wait( - "Check the status of the service using check_status. If it fails, say 'service is down'." + "Check the status of the service using check_status." + " If it fails, say 'service is down'." ) assert answer is not None assert "service is down" in answer.data.content.lower() diff --git a/test/snapshots/tool_results/should_preserve_tooltelemetry_and_not_stringify_structured_results_for_llm.yaml b/test/snapshots/tool_results/should_preserve_tooltelemetry_and_not_stringify_structured_results_for_llm.yaml index 0e0158701..71021d3b8 100644 --- a/test/snapshots/tool_results/should_preserve_tooltelemetry_and_not_stringify_structured_results_for_llm.yaml +++ b/test/snapshots/tool_results/should_preserve_tooltelemetry_and_not_stringify_structured_results_for_llm.yaml @@ -17,4 +17,4 @@ conversations: tool_call_id: toolcall_0 content: "Analysis of main.ts: no issues found" - role: assistant - content: "The analysis of main.ts is complete -- no issues were found." + content: The analysis of main.ts is complete -- no issues were found. From 5a8206c1355be4d08d5ff3b6ccf9b112db30846a Mon Sep 17 00:00:00 2001 From: Andy Adams-Moran Date: Wed, 1 Apr 2026 16:31:07 +0100 Subject: [PATCH 07/12] Fix Python SDK to always send structured tool results Remove the failure special-case that sent only the error string for result_type='failure'. Now the Python SDK always sends the full ResultResult struct (including error, resultType, toolTelemetry), consistent with Node, Go, and .NET SDKs. This fixes the e2e test snapshot mismatch: the shared YAML snapshots expect the CLI to receive a structured result (which it formats as 'Failed to execute ... due to error: Error: Tool execution failed'), but the old Python path sent only the error string, producing a different message format that the replay proxy couldn't match. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- python/copilot/session.py | 28 +++++++++------------------- 1 file changed, 9 insertions(+), 19 deletions(-) diff --git a/python/copilot/session.py b/python/copilot/session.py index 019436f7a..76979e3fb 100644 --- a/python/copilot/session.py +++ b/python/copilot/session.py @@ -944,27 +944,17 @@ async def _execute_tool_and_respond( else: tool_result = result # type: ignore[assignment] - # If the tool reported a failure with an error message, send it via the - # top-level error param so the server formats the tool message consistently - # with other SDKs (e.g., "Failed to execute 'tool' ... due to error: ..."). - if tool_result.result_type == "failure" and tool_result.error: - await self.rpc.tools.handle_pending_tool_call( - SessionToolsHandlePendingToolCallParams( - request_id=request_id, + await self.rpc.tools.handle_pending_tool_call( + SessionToolsHandlePendingToolCallParams( + request_id=request_id, + result=ResultResult( + text_result_for_llm=tool_result.text_result_for_llm, + result_type=tool_result.result_type, error=tool_result.error, - ) - ) - else: - await self.rpc.tools.handle_pending_tool_call( - SessionToolsHandlePendingToolCallParams( - request_id=request_id, - result=ResultResult( - text_result_for_llm=tool_result.text_result_for_llm, - result_type=tool_result.result_type, - tool_telemetry=tool_result.tool_telemetry, - ), - ) + tool_telemetry=tool_result.tool_telemetry, + ), ) + ) except Exception as exc: try: await self.rpc.tools.handle_pending_tool_call( From 97001090b9285be7bd6d8aebe4b51d21e89c5b1c Mon Sep 17 00:00:00 2001 From: Andy Adams-Moran Date: Wed, 1 Apr 2026 16:44:03 +0100 Subject: [PATCH 08/12] Distinguish exception-originated failures from deliberate ones The previous commit broke test_handles_tool_calling_errors because define_tool's exception handler wraps exceptions as ToolResult(failure), which was then sent as a structured result instead of a top-level error. Fix: introduce TOOL_EXCEPTION_TEXT constant shared between define_tool's exception handler and _execute_tool_and_respond. When the failure's textResultForLlm matches the known exception wrapper message, send via the top-level error param (CLI formats as 'Failed to execute...'); otherwise send the full structured result to preserve metadata. This preserves backward compatibility for thrown exceptions while allowing user-returned ToolResult(failure) to carry toolTelemetry. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- python/copilot/session.py | 36 ++++++++++++++++++++++++++---------- python/copilot/tools.py | 8 ++++++-- 2 files changed, 32 insertions(+), 12 deletions(-) diff --git a/python/copilot/session.py b/python/copilot/session.py index 76979e3fb..2e11124ca 100644 --- a/python/copilot/session.py +++ b/python/copilot/session.py @@ -38,7 +38,7 @@ SessionEventType, session_event_from_dict, ) -from .tools import Tool, ToolHandler, ToolInvocation, ToolResult +from .tools import TOOL_EXCEPTION_TEXT, Tool, ToolHandler, ToolInvocation, ToolResult # Re-export SessionEvent under an alias used internally SessionEventTypeAlias = SessionEvent @@ -944,17 +944,33 @@ async def _execute_tool_and_respond( else: tool_result = result # type: ignore[assignment] - await self.rpc.tools.handle_pending_tool_call( - SessionToolsHandlePendingToolCallParams( - request_id=request_id, - result=ResultResult( - text_result_for_llm=tool_result.text_result_for_llm, - result_type=tool_result.result_type, + # Exception-originated failures (from define_tool's exception handler) are + # sent via the top-level error param so the CLI formats them with its + # standard "Failed to execute..." message. Deliberate user-returned + # failures send the full structured result to preserve metadata. + if ( + tool_result.result_type == "failure" + and tool_result.error + and tool_result.text_result_for_llm == TOOL_EXCEPTION_TEXT + ): + await self.rpc.tools.handle_pending_tool_call( + SessionToolsHandlePendingToolCallParams( + request_id=request_id, error=tool_result.error, - tool_telemetry=tool_result.tool_telemetry, - ), + ) + ) + else: + await self.rpc.tools.handle_pending_tool_call( + SessionToolsHandlePendingToolCallParams( + request_id=request_id, + result=ResultResult( + text_result_for_llm=tool_result.text_result_for_llm, + result_type=tool_result.result_type, + error=tool_result.error, + tool_telemetry=tool_result.tool_telemetry, + ), + ) ) - ) except Exception as exc: try: await self.rpc.tools.handle_pending_tool_call( diff --git a/python/copilot/tools.py b/python/copilot/tools.py index f559cfefe..c7bd836b7 100644 --- a/python/copilot/tools.py +++ b/python/copilot/tools.py @@ -17,6 +17,11 @@ ToolResultType = Literal["success", "failure", "rejected", "denied"] +# Constant used by define_tool's exception handler so that +# _execute_tool_and_respond can detect exception-originated failures +# and send them via the top-level error param (matching CLI formatting). +TOOL_EXCEPTION_TEXT = "Invoking this tool produced an error. Detailed information is not available." + @dataclass class ToolBinaryResult: @@ -195,8 +200,7 @@ async def wrapped_handler(invocation: ToolInvocation) -> ToolResult: # Don't expose detailed error information to the LLM for security reasons. # The actual error is stored in the 'error' field for debugging. return ToolResult( - text_result_for_llm="Invoking this tool produced an error. " - "Detailed information is not available.", + text_result_for_llm=TOOL_EXCEPTION_TEXT, result_type="failure", error=str(exc), tool_telemetry={}, From caac4254969afc965bff9d27b58b69488dd16aef Mon Sep 17 00:00:00 2001 From: Andy Adams-Moran Date: Wed, 1 Apr 2026 17:19:28 +0100 Subject: [PATCH 09/12] Address code review: tighter guards, default ResultType, session cleanup - Node: validate resultType against allowed values in isToolResultObject - Go: default empty ResultType to 'success' (or 'failure' when error set) - Python: use _from_exception attribute instead of sentinel string match - Python/Go: disconnect sessions in e2e tests to avoid leaking state Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- go/internal/e2e/tool_results_test.go | 12 ++++++ go/session.go | 14 +++++-- nodejs/src/session.ts | 30 ++++++++++----- python/copilot/session.py | 8 +--- python/copilot/tools.py | 17 +++++---- python/e2e/test_tool_results.py | 57 +++++++++++++++++----------- 6 files changed, 90 insertions(+), 48 deletions(-) diff --git a/go/internal/e2e/tool_results_test.go b/go/internal/e2e/tool_results_test.go index cc21e30b6..b35d9b5d0 100644 --- a/go/internal/e2e/tool_results_test.go +++ b/go/internal/e2e/tool_results_test.go @@ -53,6 +53,10 @@ func TestToolResults(t *testing.T) { if !strings.Contains(strings.ToLower(content), "sunny") && !strings.Contains(content, "72") { t.Errorf("Expected answer to mention sunny or 72, got %q", content) } + + if err := session.Disconnect(); err != nil { + t.Errorf("Failed to disconnect session: %v", err) + } }) t.Run("should handle tool result with failure resulttype", func(t *testing.T) { @@ -97,6 +101,10 @@ func TestToolResults(t *testing.T) { if !strings.Contains(strings.ToLower(content), "service is down") { t.Errorf("Expected 'service is down', got %q", content) } + + if err := session.Disconnect(); err != nil { + t.Errorf("Failed to disconnect session: %v", err) + } }) t.Run("should preserve tooltelemetry and not stringify structured results for llm", func(t *testing.T) { @@ -167,5 +175,9 @@ func TestToolResults(t *testing.T) { if strings.Contains(toolResults[0].Content, "resultType") { t.Error("Tool result content should not contain 'resultType'") } + + if err := session.Disconnect(); err != nil { + t.Errorf("Failed to disconnect session: %v", err) + } }) } diff --git a/go/session.go b/go/session.go index 49a4b1a63..cf970450d 100644 --- a/go/session.go +++ b/go/session.go @@ -625,15 +625,23 @@ func (s *Session) executeToolAndRespond(requestID, toolName, toolCallID string, textResultForLLM = fmt.Sprintf("%v", result) } + // Default ResultType to "success" when unset, or "failure" when there's an error. + effectiveResultType := result.ResultType + if effectiveResultType == "" { + if result.Error != "" { + effectiveResultType = "failure" + } else { + effectiveResultType = "success" + } + } + rpcResult := rpc.ResultUnion{ ResultResult: &rpc.ResultResult{ TextResultForLlm: textResultForLLM, ToolTelemetry: result.ToolTelemetry, + ResultType: &effectiveResultType, }, } - if result.ResultType != "" { - rpcResult.ResultResult.ResultType = &result.ResultType - } if result.Error != "" { rpcResult.ResultResult.Error = &result.Error } diff --git a/nodejs/src/session.ts b/nodejs/src/session.ts index e6a51c0a3..c2af3f8d4 100644 --- a/nodejs/src/session.ts +++ b/nodejs/src/session.ts @@ -1050,15 +1050,27 @@ export class CopilotSession { /** * Type guard that checks whether a value is a {@link ToolResultObject}. - * A valid object must have a string `textResultForLlm` and a string `resultType`. + * A valid object must have a string `textResultForLlm` and a recognized `resultType`. */ function isToolResultObject(value: unknown): value is ToolResultObject { - return ( - typeof value === "object" && - value !== null && - "textResultForLlm" in value && - typeof (value as ToolResultObject).textResultForLlm === "string" && - "resultType" in value && - typeof (value as ToolResultObject).resultType === "string" - ); + if (typeof value !== "object" || value === null) { + return false; + } + + if (!("textResultForLlm" in value) || typeof (value as ToolResultObject).textResultForLlm !== "string") { + return false; + } + + if (!("resultType" in value) || typeof (value as ToolResultObject).resultType !== "string") { + return false; + } + + const allowedResultTypes: Array = [ + "success", + "failure", + "rejected", + "denied", + ]; + + return allowedResultTypes.includes((value as ToolResultObject).resultType); } diff --git a/python/copilot/session.py b/python/copilot/session.py index 2e11124ca..4bfbe1964 100644 --- a/python/copilot/session.py +++ b/python/copilot/session.py @@ -38,7 +38,7 @@ SessionEventType, session_event_from_dict, ) -from .tools import TOOL_EXCEPTION_TEXT, Tool, ToolHandler, ToolInvocation, ToolResult +from .tools import Tool, ToolHandler, ToolInvocation, ToolResult # Re-export SessionEvent under an alias used internally SessionEventTypeAlias = SessionEvent @@ -948,11 +948,7 @@ async def _execute_tool_and_respond( # sent via the top-level error param so the CLI formats them with its # standard "Failed to execute..." message. Deliberate user-returned # failures send the full structured result to preserve metadata. - if ( - tool_result.result_type == "failure" - and tool_result.error - and tool_result.text_result_for_llm == TOOL_EXCEPTION_TEXT - ): + if getattr(tool_result, "_from_exception", False): await self.rpc.tools.handle_pending_tool_call( SessionToolsHandlePendingToolCallParams( request_id=request_id, diff --git a/python/copilot/tools.py b/python/copilot/tools.py index c7bd836b7..bdb993c62 100644 --- a/python/copilot/tools.py +++ b/python/copilot/tools.py @@ -17,11 +17,6 @@ ToolResultType = Literal["success", "failure", "rejected", "denied"] -# Constant used by define_tool's exception handler so that -# _execute_tool_and_respond can detect exception-originated failures -# and send them via the top-level error param (matching CLI formatting). -TOOL_EXCEPTION_TEXT = "Invoking this tool produced an error. Detailed information is not available." - @dataclass class ToolBinaryResult: @@ -199,12 +194,20 @@ async def wrapped_handler(invocation: ToolInvocation) -> ToolResult: except Exception as exc: # Don't expose detailed error information to the LLM for security reasons. # The actual error is stored in the 'error' field for debugging. - return ToolResult( - text_result_for_llm=TOOL_EXCEPTION_TEXT, + tr = ToolResult( + text_result_for_llm=( + "Invoking this tool produced an error. " + "Detailed information is not available." + ), result_type="failure", error=str(exc), tool_telemetry={}, ) + # Mark as exception-originated so _execute_tool_and_respond + # sends it via the top-level error param (matching CLI formatting) + # rather than as a structured result. + tr._from_exception = True # type: ignore[attr-defined] + return tr return Tool( name=tool_name, diff --git a/python/e2e/test_tool_results.py b/python/e2e/test_tool_results.py index 516af8859..d08a62191 100644 --- a/python/e2e/test_tool_results.py +++ b/python/e2e/test_tool_results.py @@ -30,12 +30,15 @@ def get_weather(params: WeatherParams, invocation: ToolInvocation) -> ToolResult on_permission_request=PermissionHandler.approve_all, tools=[get_weather] ) - await session.send("What's the weather in Paris?") - assistant_message = await get_final_assistant_message(session) - assert ( - "sunny" in assistant_message.data.content.lower() - or "72" in assistant_message.data.content - ) + try: + await session.send("What's the weather in Paris?") + assistant_message = await get_final_assistant_message(session) + assert ( + "sunny" in assistant_message.data.content.lower() + or "72" in assistant_message.data.content + ) + finally: + await session.disconnect() async def test_should_handle_tool_result_with_failure_resulttype(self, ctx: E2ETestContext): @define_tool("check_status", description="Checks the status of a service") @@ -50,12 +53,15 @@ def check_status(invocation: ToolInvocation) -> ToolResult: on_permission_request=PermissionHandler.approve_all, tools=[check_status] ) - answer = await session.send_and_wait( - "Check the status of the service using check_status." - " If it fails, say 'service is down'." - ) - assert answer is not None - assert "service is down" in answer.data.content.lower() + try: + answer = await session.send_and_wait( + "Check the status of the service using check_status." + " If it fails, say 'service is down'." + ) + assert answer is not None + assert "service is down" in answer.data.content.lower() + finally: + await session.disconnect() async def test_should_preserve_tooltelemetry_and_not_stringify_structured_results_for_llm( self, ctx: E2ETestContext @@ -78,14 +84,19 @@ def analyze_code(params: AnalyzeParams, invocation: ToolInvocation) -> ToolResul on_permission_request=PermissionHandler.approve_all, tools=[analyze_code] ) - await session.send("Analyze the file main.ts for issues.") - assistant_message = await get_final_assistant_message(session) - assert "no issues" in assistant_message.data.content.lower() - - # Verify the LLM received just textResultForLlm, not stringified JSON - traffic = await ctx.get_exchanges() - last_conversation = traffic[-1] - tool_results = [m for m in last_conversation["request"]["messages"] if m["role"] == "tool"] - assert len(tool_results) == 1 - assert "toolTelemetry" not in tool_results[0]["content"] - assert "resultType" not in tool_results[0]["content"] + try: + await session.send("Analyze the file main.ts for issues.") + assistant_message = await get_final_assistant_message(session) + assert "no issues" in assistant_message.data.content.lower() + + # Verify the LLM received just textResultForLlm, not stringified JSON + traffic = await ctx.get_exchanges() + last_conversation = traffic[-1] + tool_results = [ + m for m in last_conversation["request"]["messages"] if m["role"] == "tool" + ] + assert len(tool_results) == 1 + assert "toolTelemetry" not in tool_results[0]["content"] + assert "resultType" not in tool_results[0]["content"] + finally: + await session.disconnect() From 0aaf09406145e980041d9aa6ed00bf929dfe2cdc Mon Sep 17 00:00:00 2001 From: Andy Adams-Moran Date: Wed, 1 Apr 2026 17:26:28 +0100 Subject: [PATCH 10/12] Add 'timeout' to ToolResultType in Node and Python The agent runtime recently added 'timeout' as a fifth valid resultType. Update the type guard and type definitions to match. --- nodejs/src/session.ts | 1 + nodejs/src/types.ts | 2 +- python/copilot/tools.py | 2 +- 3 files changed, 3 insertions(+), 2 deletions(-) diff --git a/nodejs/src/session.ts b/nodejs/src/session.ts index c2af3f8d4..6df938773 100644 --- a/nodejs/src/session.ts +++ b/nodejs/src/session.ts @@ -1070,6 +1070,7 @@ function isToolResultObject(value: unknown): value is ToolResultObject { "failure", "rejected", "denied", + "timeout", ]; return allowedResultTypes.includes((value as ToolResultObject).resultType); diff --git a/nodejs/src/types.ts b/nodejs/src/types.ts index b4b9e563c..ba6a7ed6d 100644 --- a/nodejs/src/types.ts +++ b/nodejs/src/types.ts @@ -176,7 +176,7 @@ export interface CopilotClientOptions { /** * Configuration for creating a session */ -export type ToolResultType = "success" | "failure" | "rejected" | "denied"; +export type ToolResultType = "success" | "failure" | "rejected" | "denied" | "timeout"; export type ToolBinaryResult = { data: string; diff --git a/python/copilot/tools.py b/python/copilot/tools.py index bdb993c62..d6142788f 100644 --- a/python/copilot/tools.py +++ b/python/copilot/tools.py @@ -15,7 +15,7 @@ from pydantic import BaseModel -ToolResultType = Literal["success", "failure", "rejected", "denied"] +ToolResultType = Literal["success", "failure", "rejected", "denied", "timeout"] @dataclass From f84feece1fe730c9a7d567b59d867408fb0742ef Mon Sep 17 00:00:00 2001 From: Andy Adams-Moran Date: Wed, 1 Apr 2026 17:31:44 +0100 Subject: [PATCH 11/12] Fix prettier formatting in session.ts --- nodejs/src/session.ts | 5 ++++- .../session_config/should_accept_blob_attachments.yaml | 1 - .../session_config/should_accept_message_attachments.yaml | 4 ++++ .../should_support_multiple_concurrent_sessions.yaml | 8 ++++---- 4 files changed, 12 insertions(+), 6 deletions(-) diff --git a/nodejs/src/session.ts b/nodejs/src/session.ts index 6df938773..09b5ca92f 100644 --- a/nodejs/src/session.ts +++ b/nodejs/src/session.ts @@ -1057,7 +1057,10 @@ function isToolResultObject(value: unknown): value is ToolResultObject { return false; } - if (!("textResultForLlm" in value) || typeof (value as ToolResultObject).textResultForLlm !== "string") { + if ( + !("textResultForLlm" in value) || + typeof (value as ToolResultObject).textResultForLlm !== "string" + ) { return false; } diff --git a/test/snapshots/session_config/should_accept_blob_attachments.yaml b/test/snapshots/session_config/should_accept_blob_attachments.yaml index 89e5d47ed..804775557 100644 --- a/test/snapshots/session_config/should_accept_blob_attachments.yaml +++ b/test/snapshots/session_config/should_accept_blob_attachments.yaml @@ -5,4 +5,3 @@ conversations: - role: system content: ${system} - role: user - content: Describe this image diff --git a/test/snapshots/session_config/should_accept_message_attachments.yaml b/test/snapshots/session_config/should_accept_message_attachments.yaml index 3ea9f830a..7d47efce7 100644 --- a/test/snapshots/session_config/should_accept_message_attachments.yaml +++ b/test/snapshots/session_config/should_accept_message_attachments.yaml @@ -1,6 +1,10 @@ models: - claude-sonnet-4.5 conversations: + - messages: + - role: system + content: ${system} + - role: user - messages: - role: system content: ${system} diff --git a/test/snapshots/session_lifecycle/should_support_multiple_concurrent_sessions.yaml b/test/snapshots/session_lifecycle/should_support_multiple_concurrent_sessions.yaml index cf55fcc17..fdb7ebca0 100644 --- a/test/snapshots/session_lifecycle/should_support_multiple_concurrent_sessions.yaml +++ b/test/snapshots/session_lifecycle/should_support_multiple_concurrent_sessions.yaml @@ -5,13 +5,13 @@ conversations: - role: system content: ${system} - role: user - content: What is 3+3? Reply with just the number. + content: What is 1+1? Reply with just the number. - role: assistant - content: "6" + content: "2" - messages: - role: system content: ${system} - role: user - content: What is 1+1? Reply with just the number. + content: What is 3+3? Reply with just the number. - role: assistant - content: "2" + content: "6" From dcfd60738818434b03e439a4630e850e4e13d880 Mon Sep 17 00:00:00 2001 From: Andy Adams-Moran Date: Wed, 1 Apr 2026 17:55:55 +0100 Subject: [PATCH 12/12] Make _from_exception a typed dataclass field; revert accidental snapshot changes - Python: replace dynamic attribute with proper dataclass field (field(default=False, repr=False)) so type-checkers can see it - Revert accidentally committed snapshot modifications for blob_attachments and message_attachments tests --- python/copilot/session.py | 2 +- python/copilot/tools.py | 11 ++++------- .../should_accept_blob_attachments.yaml | 1 + .../should_accept_message_attachments.yaml | 4 ---- 4 files changed, 6 insertions(+), 12 deletions(-) diff --git a/python/copilot/session.py b/python/copilot/session.py index 4bfbe1964..c4feb82de 100644 --- a/python/copilot/session.py +++ b/python/copilot/session.py @@ -948,7 +948,7 @@ async def _execute_tool_and_respond( # sent via the top-level error param so the CLI formats them with its # standard "Failed to execute..." message. Deliberate user-returned # failures send the full structured result to preserve metadata. - if getattr(tool_result, "_from_exception", False): + if tool_result._from_exception: await self.rpc.tools.handle_pending_tool_call( SessionToolsHandlePendingToolCallParams( request_id=request_id, diff --git a/python/copilot/tools.py b/python/copilot/tools.py index d6142788f..66c660536 100644 --- a/python/copilot/tools.py +++ b/python/copilot/tools.py @@ -10,7 +10,7 @@ import inspect import json from collections.abc import Awaitable, Callable -from dataclasses import dataclass +from dataclasses import dataclass, field from typing import Any, Literal, TypeVar, get_type_hints, overload from pydantic import BaseModel @@ -38,6 +38,7 @@ class ToolResult: binary_results_for_llm: list[ToolBinaryResult] | None = None session_log: str | None = None tool_telemetry: dict[str, Any] | None = None + _from_exception: bool = field(default=False, repr=False) @dataclass @@ -194,7 +195,7 @@ async def wrapped_handler(invocation: ToolInvocation) -> ToolResult: except Exception as exc: # Don't expose detailed error information to the LLM for security reasons. # The actual error is stored in the 'error' field for debugging. - tr = ToolResult( + return ToolResult( text_result_for_llm=( "Invoking this tool produced an error. " "Detailed information is not available." @@ -202,12 +203,8 @@ async def wrapped_handler(invocation: ToolInvocation) -> ToolResult: result_type="failure", error=str(exc), tool_telemetry={}, + _from_exception=True, ) - # Mark as exception-originated so _execute_tool_and_respond - # sends it via the top-level error param (matching CLI formatting) - # rather than as a structured result. - tr._from_exception = True # type: ignore[attr-defined] - return tr return Tool( name=tool_name, diff --git a/test/snapshots/session_config/should_accept_blob_attachments.yaml b/test/snapshots/session_config/should_accept_blob_attachments.yaml index 804775557..89e5d47ed 100644 --- a/test/snapshots/session_config/should_accept_blob_attachments.yaml +++ b/test/snapshots/session_config/should_accept_blob_attachments.yaml @@ -5,3 +5,4 @@ conversations: - role: system content: ${system} - role: user + content: Describe this image diff --git a/test/snapshots/session_config/should_accept_message_attachments.yaml b/test/snapshots/session_config/should_accept_message_attachments.yaml index 7d47efce7..3ea9f830a 100644 --- a/test/snapshots/session_config/should_accept_message_attachments.yaml +++ b/test/snapshots/session_config/should_accept_message_attachments.yaml @@ -1,10 +1,6 @@ models: - claude-sonnet-4.5 conversations: - - messages: - - role: system - content: ${system} - - role: user - messages: - role: system content: ${system}