forked from github/copilot-cli
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathtest-visual-analysis-bounds.js
More file actions
166 lines (153 loc) · 6.78 KB
/
test-visual-analysis-bounds.js
File metadata and controls
166 lines (153 loc) · 6.78 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
#!/usr/bin/env node
const assert = require('assert');
const path = require('path');
const { createMessageBuilder } = require(path.join(__dirname, '..', 'src', 'main', 'ai-service', 'message-builder.js'));
function createBuilder({ latestVisual, foreground, watcherSnapshot } = {}) {
return createMessageBuilder({
getBrowserSessionState: () => ({ lastUpdated: null }),
getCurrentProvider: () => 'copilot',
getForegroundWindowInfo: async () => foreground || null,
getInspectService: () => ({ isInspectModeActive: () => false }),
getLatestVisualContext: () => latestVisual || null,
getPreferencesSystemContext: () => '',
getPreferencesSystemContextForApp: () => '',
getRecentConversationHistory: () => [],
getSemanticDOMContextText: () => '',
getUIWatcher: () => ({
isPolling: false,
getCapabilitySnapshot: () => watcherSnapshot || null,
getContextForAI: () => ''
}),
maxHistory: 0,
systemPrompt: 'base system prompt'
});
}
async function test(name, fn) {
try {
await fn();
console.log(`PASS ${name}`);
} catch (error) {
console.error(`FAIL ${name}`);
console.error(error.stack || error.message);
process.exitCode = 1;
}
}
async function buildVisualEvidenceMessage({ latestVisual, foreground, watcherSnapshot, userMessage }) {
const builder = createBuilder({ latestVisual, foreground, watcherSnapshot });
const messages = await builder.buildMessages(userMessage, true);
return messages.find((entry) => entry.role === 'system' && entry.content.includes('## Current Visual Evidence Bounds'));
}
async function buildDrawingEvidenceMessage({ latestVisual, foreground, watcherSnapshot, userMessage }) {
const builder = createBuilder({ latestVisual, foreground, watcherSnapshot });
const messages = await builder.buildMessages(userMessage, true);
return messages.find((entry) => entry.role === 'system' && entry.content.includes('## Drawing Capability Bounds'));
}
async function main() {
await test('degraded TradingView analysis prompt forbids precise unseen indicator claims', async () => {
const visualMessage = await buildVisualEvidenceMessage({
latestVisual: {
dataURL: 'data:image/png;base64,AAAA',
captureMode: 'screen-copyfromscreen',
captureTrusted: false,
scope: 'screen'
},
foreground: {
success: true,
processName: 'tradingview',
title: 'TradingView - LUNR'
},
watcherSnapshot: {
activeWindowElementCount: 4,
interactiveElementCount: 2,
namedInteractiveElementCount: 1,
activeWindow: {
processName: 'tradingview',
title: 'TradingView - LUNR'
}
},
userMessage: 'give me your synthesis of LUNR in tradingview'
});
assert(visualMessage, 'visual evidence block should be injected');
assert(visualMessage.content.includes('captureMode: screen-copyfromscreen'));
assert(visualMessage.content.includes('captureTrusted: no'));
assert(visualMessage.content.includes('evidenceQuality: degraded-mixed-desktop'));
assert(visualMessage.content.includes('Rule: Treat the current screenshot as degraded mixed-desktop evidence, not a trusted target-window capture.'));
assert(visualMessage.content.includes('Rule: For TradingView or other low-UIA chart apps, do not claim precise indicator values, exact trendline coordinates, or exact support/resistance numbers unless they are directly legible in the screenshot or supplied by a stronger evidence path.'));
assert(visualMessage.content.includes('Rule: If a detail is not directly legible, state uncertainty explicitly and offer bounded next steps.'));
});
await test('trusted target-window capture allows stronger direct observation wording', async () => {
const visualMessage = await buildVisualEvidenceMessage({
latestVisual: {
dataURL: 'data:image/png;base64,AAAA',
captureMode: 'window-copyfromscreen',
captureTrusted: true,
scope: 'window'
},
foreground: {
success: true,
processName: 'tradingview',
title: 'TradingView - LUNR'
},
watcherSnapshot: {
activeWindowElementCount: 4,
interactiveElementCount: 2,
namedInteractiveElementCount: 1,
activeWindow: {
processName: 'tradingview',
title: 'TradingView - LUNR'
}
},
userMessage: 'analyze the tradingview chart'
});
assert(visualMessage, 'visual evidence block should be injected');
assert(visualMessage.content.includes('captureMode: window-copyfromscreen'));
assert(visualMessage.content.includes('captureTrusted: yes'));
assert(visualMessage.content.includes('evidenceQuality: trusted-target-window'));
assert(visualMessage.content.includes('Rule: Describe directly visible facts from the current screenshot first, then clearly separate any interpretation or trading hypothesis.'));
assert(visualMessage.content.includes('Rule: Even with trusted capture, only state precise chart indicator values when they are directly legible in the screenshot or supported by a stronger evidence path.'));
});
await test('drawing placement requests inject explicit capability bounds', async () => {
const drawingMessage = await buildDrawingEvidenceMessage({
latestVisual: {
dataURL: 'data:image/png;base64,AAAA',
captureMode: 'screen-copyfromscreen',
captureTrusted: false,
scope: 'screen'
},
foreground: {
success: true,
processName: 'tradingview',
title: 'TradingView - LUNR'
},
watcherSnapshot: {
activeWindowElementCount: 4,
interactiveElementCount: 2,
namedInteractiveElementCount: 1,
activeWindow: {
processName: 'tradingview',
title: 'TradingView - LUNR'
}
},
userMessage: 'draw and place a trend line exactly on tradingview'
});
assert(drawingMessage, 'drawing evidence block should be injected');
assert(
drawingMessage.content.includes('requestKind: placement-request')
|| drawingMessage.content.includes('requestKind: precise-placement')
);
assert(drawingMessage.content.includes('Distinguish TradingView drawing surface access from precise chart-object placement'));
assert(
drawingMessage.content.includes('Do not claim a trendline or other chart object was placed precisely')
|| drawingMessage.content.includes('Do not claim a TradingView drawing was placed precisely')
);
assert(
drawingMessage.content.includes('screenshot-only or degraded visual evidence')
|| drawingMessage.content.includes('explicitly refuse precise-placement claims')
);
});
}
main().catch((error) => {
console.error('FAIL visual analysis bounds');
console.error(error.stack || error.message);
process.exit(1);
});