forked from github/copilot-sdk-java
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathToolResultsTest.java
More file actions
148 lines (117 loc) · 6.24 KB
/
ToolResultsTest.java
File metadata and controls
148 lines (117 loc) · 6.24 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
/*---------------------------------------------------------------------------------------------
* Copyright (c) Microsoft Corporation. All rights reserved.
*--------------------------------------------------------------------------------------------*/
package com.github.copilot.sdk;
import static org.junit.jupiter.api.Assertions.*;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.concurrent.CompletableFuture;
import java.util.concurrent.TimeUnit;
import org.junit.jupiter.api.AfterAll;
import org.junit.jupiter.api.BeforeAll;
import org.junit.jupiter.api.Test;
import com.github.copilot.sdk.generated.SessionEvent;
import com.github.copilot.sdk.generated.ToolExecutionCompleteEvent;
import com.github.copilot.sdk.json.MessageOptions;
import com.github.copilot.sdk.json.PermissionHandler;
import com.github.copilot.sdk.json.SessionConfig;
import com.github.copilot.sdk.json.ToolDefinition;
import com.github.copilot.sdk.json.ToolResultObject;
/**
* E2E tests for tool result types — verifying that rejected and denied result
* types are handled correctly by the runtime.
*
* <p>
* Snapshots are stored in {@code test/snapshots/tool_results/}.
* </p>
*/
public class ToolResultsTest {
private static E2ETestContext ctx;
@BeforeAll
static void setup() throws Exception {
ctx = E2ETestContext.create();
}
@AfterAll
static void teardown() throws Exception {
if (ctx != null) {
ctx.close();
}
}
/**
* Verifies that a tool returning a "rejected" resultType is reported as a
* failed tool execution with the correct error code.
*
* @see Snapshot:
* tool_results/should_handle_tool_result_with_rejected_resulttype
*/
@Test
void testShouldHandleToolResultWithRejectedResultType() throws Exception {
ctx.configureForTest("tool_results", "should_handle_tool_result_with_rejected_resulttype");
var toolHandlerCalled = new boolean[]{false};
Map<String, Object> params = Map.of("type", "object", "properties", Map.of(), "required", List.of());
ToolDefinition deployTool = ToolDefinition.create("deploy_service", "Deploys a service", params,
(invocation) -> {
toolHandlerCalled[0] = true;
return CompletableFuture.completedFuture(new ToolResultObject("rejected",
"Deployment rejected: policy violation - production deployments require approval", null,
null, null, null));
});
try (CopilotClient client = ctx.createClient()) {
CopilotSession session = client.createSession(new SessionConfig().setTools(List.of(deployTool))
.setOnPermissionRequest(PermissionHandler.APPROVE_ALL)).get();
List<SessionEvent> events = new ArrayList<>();
session.on(events::add);
session.sendAndWait(new MessageOptions().setPrompt(
"Deploy the service using deploy_service. If it's rejected, tell me it was 'rejected by policy'."))
.get(60, TimeUnit.SECONDS);
assertTrue(toolHandlerCalled[0], "Tool handler should have been called");
List<ToolExecutionCompleteEvent> toolEvents = events.stream()
.filter(e -> e instanceof ToolExecutionCompleteEvent).map(e -> (ToolExecutionCompleteEvent) e)
.toList();
assertFalse(toolEvents.isEmpty(), "Should have a tool.execution_complete event");
ToolExecutionCompleteEvent toolEvt = toolEvents.get(0);
assertFalse(toolEvt.getData().success(), "Tool execution should not be marked as successful");
assertNotNull(toolEvt.getData().error(), "Should have error details");
assertEquals("rejected", toolEvt.getData().error().code(), "Error code should be 'rejected'");
session.close();
}
}
/**
* Verifies that a tool returning a "denied" resultType is reported as a failed
* tool execution with the correct error code.
*
* @see Snapshot: tool_results/should_handle_tool_result_with_denied_resulttype
*/
@Test
void testShouldHandleToolResultWithDeniedResultType() throws Exception {
ctx.configureForTest("tool_results", "should_handle_tool_result_with_denied_resulttype");
var toolHandlerCalled = new boolean[]{false};
Map<String, Object> params = Map.of("type", "object", "properties", Map.of(), "required", List.of());
ToolDefinition accessTool = ToolDefinition.create("access_secret", "Accesses a secret", params,
(invocation) -> {
toolHandlerCalled[0] = true;
return CompletableFuture.completedFuture(new ToolResultObject("denied",
"Access denied: insufficient permissions to read secrets", null, null, null, null));
});
try (CopilotClient client = ctx.createClient()) {
CopilotSession session = client.createSession(new SessionConfig().setTools(List.of(accessTool))
.setOnPermissionRequest(PermissionHandler.APPROVE_ALL)).get();
List<SessionEvent> events = new ArrayList<>();
session.on(events::add);
session.sendAndWait(new MessageOptions().setPrompt(
"Use access_secret to get the API key. If access is denied, tell me it was 'access denied'."))
.get(60, TimeUnit.SECONDS);
assertTrue(toolHandlerCalled[0], "Tool handler should have been called");
List<ToolExecutionCompleteEvent> toolEvents = events.stream()
.filter(e -> e instanceof ToolExecutionCompleteEvent).map(e -> (ToolExecutionCompleteEvent) e)
.toList();
assertFalse(toolEvents.isEmpty(), "Should have a tool.execution_complete event");
ToolExecutionCompleteEvent toolEvt = toolEvents.get(0);
assertFalse(toolEvt.getData().success(), "Tool execution should not be marked as successful");
assertNotNull(toolEvt.getData().error(), "Should have error details");
assertEquals("denied", toolEvt.getData().error().code(), "Error code should be 'denied'");
session.close();
}
}
}