Skip to content

Commit b3ff811

Browse files
committed
Prove chat continuity and extract TradingView helpers
1 parent 5d32615 commit b3ff811

16 files changed

+2384
-455
lines changed

docs/CHAT_CONTINUITY_IMPLEMENTATION_PLAN.md

Lines changed: 551 additions & 10 deletions
Large diffs are not rendered by default.

scripts/test-bug-fixes.js

Lines changed: 16 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -228,34 +228,40 @@ test('rewriteActionsForReliability normalizes typoed app launches', () => {
228228

229229
test('ai-service normalizes app identity for learned skill scope', () => {
230230
const aiServicePath = path.join(__dirname, '..', 'src', 'main', 'ai-service.js');
231+
const appProfilePath = path.join(__dirname, '..', 'src', 'main', 'tradingview', 'app-profile.js');
231232
const fs = require('fs');
232233

233234
const aiServiceContent = fs.readFileSync(aiServicePath, 'utf8');
235+
const appProfileContent = fs.readFileSync(appProfilePath, 'utf8');
234236

235-
assert(aiServiceContent.includes('resolveNormalizedAppIdentity('), 'ai-service should define normalized app identity resolution');
236-
assert(aiServiceContent.includes("'tradeing view'"), 'ai-service should recognize the TradingView typo alias');
237+
assert(aiServiceContent.includes("require('./tradingview/app-profile')"), 'ai-service should consume the extracted app profile module');
238+
assert(appProfileContent.includes('resolveNormalizedAppIdentity('), 'app profile module should define normalized app identity resolution');
239+
assert(appProfileContent.includes("'tradeing view'"), 'app profile module should recognize the TradingView typo alias');
237240
assert(aiServiceContent.includes('normalizedSkillApp?.processNames'), 'Learned skill scope should include normalized process names');
238241
assert(aiServiceContent.includes('normalizedSkillApp?.titleHints'), 'Learned skill scope should include normalized title hints');
239-
assert(aiServiceContent.includes('dialogTitleHints'), 'TradingView app profile should include dialog title hints');
240-
assert(aiServiceContent.includes('chartKeywords'), 'TradingView app profile should include chart-state keywords');
241-
assert(aiServiceContent.includes('drawingKeywords'), 'TradingView app profile should include drawing-tool keywords');
242-
assert(aiServiceContent.includes('pineKeywords'), 'TradingView app profile should include Pine Editor keywords');
243-
assert(aiServiceContent.includes('domKeywords'), 'TradingView app profile should include DOM keywords');
242+
assert(appProfileContent.includes('dialogTitleHints'), 'TradingView app profile should include dialog title hints');
243+
assert(appProfileContent.includes('chartKeywords'), 'TradingView app profile should include chart-state keywords');
244+
assert(appProfileContent.includes('drawingKeywords'), 'TradingView app profile should include drawing-tool keywords');
245+
assert(appProfileContent.includes('pineKeywords'), 'TradingView app profile should include Pine Editor keywords');
246+
assert(appProfileContent.includes('domKeywords'), 'TradingView app profile should include DOM keywords');
244247
});
245248

246249
test('ai-service gates TradingView follow-up typing on post-key observation checkpoints', () => {
247250
const aiServicePath = path.join(__dirname, '..', 'src', 'main', 'ai-service.js');
251+
const tradingViewVerificationPath = path.join(__dirname, '..', 'src', 'main', 'tradingview', 'verification.js');
248252
const fs = require('fs');
249253

250254
const aiServiceContent = fs.readFileSync(aiServicePath, 'utf8');
255+
const tradingViewVerificationContent = fs.readFileSync(tradingViewVerificationPath, 'utf8');
251256

252257
assert(aiServiceContent.includes('inferKeyObservationCheckpoint'), 'ai-service should infer TradingView post-key checkpoints');
253258
assert(aiServiceContent.includes('verifyKeyObservationCheckpoint'), 'ai-service should verify TradingView post-key checkpoints');
254259
assert(aiServiceContent.includes('observationCheckpoints'), 'Execution results should expose key checkpoint metadata');
255260
assert(aiServiceContent.includes('surface change before continuing'), 'Checkpoint failures should explain missing TradingView surface changes');
256-
assert(aiServiceContent.includes("classification === 'panel-open'"), 'TradingView checkpoints should recognize panel-open flows such as Pine or DOM');
257-
assert(aiServiceContent.includes('pine editor'), 'TradingView checkpoints should ground Pine Editor workflows');
258-
assert(aiServiceContent.includes('depth of market'), 'TradingView checkpoints should ground DOM workflows');
261+
assert(aiServiceContent.includes('inferTradingViewObservationSpec'), 'ai-service should consume the extracted TradingView observation-spec helper');
262+
assert(tradingViewVerificationContent.includes("classification === 'panel-open'"), 'TradingView checkpoints should recognize panel-open flows such as Pine or DOM');
263+
assert(tradingViewVerificationContent.includes('pine editor'), 'TradingView checkpoints should ground Pine Editor workflows');
264+
assert(tradingViewVerificationContent.includes('depth of market'), 'TradingView checkpoints should ground DOM workflows');
259265
});
260266

261267
test('ai-service treats TradingView DOM order-entry actions as high risk', () => {

scripts/test-chat-actionability.js

Lines changed: 148 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,53 @@ const originalLoad = Module._load;
1111
1212
let executeCount = 0;
1313
let seenMessages = [];
14+
let continuityState = process.env.__CHAT_CONTINUITY__ ? JSON.parse(process.env.__CHAT_CONTINUITY__) : null;
15+
const scriptedVisualStates = process.env.__LATEST_VISUAL_SEQUENCE__ ? JSON.parse(process.env.__LATEST_VISUAL_SEQUENCE__) : [];
16+
let lastRecordedTurn = null;
17+
18+
function isScreenLikeCaptureMode(captureMode) {
19+
const normalized = String(captureMode || '').trim().toLowerCase();
20+
return normalized === 'screen'
21+
|| normalized === 'fullscreen-fallback'
22+
|| normalized.startsWith('screen-')
23+
|| normalized.includes('fullscreen');
24+
}
25+
26+
function deriveContinuityState(turnRecord) {
27+
const actionSummary = Array.isArray(turnRecord?.actionPlan)
28+
? turnRecord.actionPlan.map((action) => action?.type).filter(Boolean).join(' -> ')
29+
: null;
30+
const verificationStatus = String(turnRecord?.verification?.status || '').trim() || null;
31+
const captureMode = String(turnRecord?.observationEvidence?.captureMode || '').trim() || null;
32+
const captureTrusted = typeof turnRecord?.observationEvidence?.captureTrusted === 'boolean'
33+
? turnRecord.observationEvidence.captureTrusted
34+
: null;
35+
36+
let degradedReason = null;
37+
if (turnRecord?.cancelled || turnRecord?.executionResult?.cancelled) {
38+
degradedReason = 'The last action batch was cancelled before completion.';
39+
} else if (verificationStatus === 'contradicted') {
40+
degradedReason = 'The latest evidence contradicts the claimed result.';
41+
} else if (verificationStatus === 'unverified') {
42+
degradedReason = 'The latest result is not fully verified yet.';
43+
} else if (isScreenLikeCaptureMode(captureMode) && captureTrusted === false) {
44+
degradedReason = 'Visual evidence fell back to full-screen capture instead of a trusted target-window capture.';
45+
}
46+
47+
return {
48+
activeGoal: turnRecord?.activeGoal || turnRecord?.executionIntent || turnRecord?.userMessage || null,
49+
currentSubgoal: turnRecord?.currentSubgoal || turnRecord?.committedSubgoal || turnRecord?.thought || null,
50+
continuationReady: !degradedReason && !(turnRecord?.cancelled || turnRecord?.executionResult?.cancelled) && turnRecord?.executionStatus !== 'failed',
51+
degradedReason,
52+
lastTurn: {
53+
actionSummary,
54+
nextRecommendedStep: turnRecord?.nextRecommendedStep || null,
55+
verificationStatus,
56+
captureMode,
57+
captureTrusted
58+
}
59+
};
60+
}
1461
1562
const actionResponse = JSON.stringify({
1663
thought: 'Set alert in TradingView',
@@ -39,6 +86,10 @@ const aiStub = {
3986
executeCount++;
4087
return { success: true, results: [], screenshotCaptured: false, postVerification: { verified: true } };
4188
},
89+
getLatestVisualContext: () => {
90+
if (!Array.isArray(scriptedVisualStates) || scriptedVisualStates.length === 0) return null;
91+
return scriptedVisualStates[Math.max(0, executeCount - 1)] || scriptedVisualStates[scriptedVisualStates.length - 1] || null;
92+
},
4293
parsePreferenceCorrection: async () => ({ success: false, error: 'not needed' })
4394
};
4495
@@ -59,18 +110,31 @@ const preferencesStub = {
59110
mergeAppPolicy: () => ({ success: true })
60111
};
61112
113+
const sessionIntentStateStub = {
114+
getChatContinuityState: () => continuityState,
115+
recordChatContinuityTurn: (turnRecord) => {
116+
lastRecordedTurn = turnRecord;
117+
continuityState = deriveContinuityState(turnRecord);
118+
return continuityState;
119+
}
120+
};
121+
62122
Module._load = function(request, parent, isMain) {
63123
if (request === '../../main/ai-service') return aiStub;
64124
if (request === '../../main/ui-watcher') return watcherStub;
65125
if (request === '../../main/system-automation') return systemAutomationStub;
66126
if (request === '../../main/preferences') return preferencesStub;
127+
if (request === '../../main/session-intent-state') return sessionIntentStateStub;
67128
return originalLoad.apply(this, arguments);
68129
};
69130
70131
(async () => {
71132
const chat = require('${chatModulePath}');
72133
const result = await chat.run([], { execute: 'auto', quiet: true });
73134
console.log('EXECUTE_COUNT:' + executeCount);
135+
console.log('SEEN_MESSAGES:' + JSON.stringify(seenMessages));
136+
console.log('RECORDED_CONTINUITY:' + JSON.stringify(continuityState));
137+
console.log('LAST_TURN:' + JSON.stringify(lastRecordedTurn));
74138
process.exit(result && result.success === false ? 1 : 0);
75139
})().catch((error) => {
76140
console.error(error.stack || error.message);
@@ -79,12 +143,20 @@ Module._load = function(request, parent, isMain) {
79143
}
80144

81145
async function runScenario(inputs) {
146+
return runScenarioWithContinuity(inputs, null, null);
147+
}
148+
149+
async function runScenarioWithContinuity(inputs, continuityState, latestVisualSequence) {
82150
const repoRoot = path.join(__dirname, '..');
83151
const chatModulePath = path.join(repoRoot, 'src', 'cli', 'commands', 'chat.js').replace(/\\/g, '\\\\');
84152
const child = spawn(process.execPath, ['-e', buildHarnessScript(chatModulePath)], {
85153
cwd: repoRoot,
86154
stdio: ['pipe', 'pipe', 'pipe'],
87-
env: process.env
155+
env: {
156+
...process.env,
157+
__CHAT_CONTINUITY__: continuityState ? JSON.stringify(continuityState) : '',
158+
__LATEST_VISUAL_SEQUENCE__: latestVisualSequence ? JSON.stringify(latestVisualSequence) : ''
159+
}
88160
});
89161

90162
let output = '';
@@ -123,6 +195,81 @@ async function main() {
123195
assert(continuity.output.includes('EXECUTE_COUNT:1'), 'continuity-style scenario should execute the emitted actions once');
124196
assert(!continuity.output.includes('Parsed action plan withheld'), 'continuity-style scenario should not be withheld as non-executable text');
125197

198+
const stateBackedContinuation = await runScenarioWithContinuity(['continue'], {
199+
activeGoal: 'Produce a confident synthesis of ticker LUNR in TradingView',
200+
currentSubgoal: 'Inspect the active TradingView chart',
201+
continuationReady: true,
202+
degradedReason: null,
203+
lastTurn: {
204+
actionSummary: 'focus_window -> screenshot',
205+
nextRecommendedStep: 'Continue from the latest chart evidence.'
206+
}
207+
});
208+
assert.strictEqual(stateBackedContinuation.exitCode, 0, 'state-backed continuation scenario should exit successfully');
209+
assert(stateBackedContinuation.output.includes('EXECUTE_COUNT:1'), 'state-backed continuation should execute emitted actions');
210+
assert(stateBackedContinuation.output.includes('SEEN_MESSAGES:["continue"]'), 'state-backed continuation should still send the minimal prompt while execution routing relies on saved continuity');
211+
212+
const persistedContinuation = await runScenarioWithContinuity([
213+
'help me make a confident synthesis of ticker LUNR in tradingview',
214+
'continue'
215+
], null, [{
216+
captureMode: 'window-copyfromscreen',
217+
captureTrusted: true,
218+
timestamp: 111,
219+
windowHandle: 458868,
220+
windowTitle: 'TradingView - LUNR'
221+
}]);
222+
assert.strictEqual(persistedContinuation.exitCode, 0, 'persisted continuation scenario should exit successfully');
223+
assert(persistedContinuation.output.includes('EXECUTE_COUNT:2'), 'persisted continuation should execute both the original and follow-up turn');
224+
assert(persistedContinuation.output.includes('SEEN_MESSAGES:["help me make a confident synthesis of ticker LUNR in tradingview","continue"]'), 'persisted continuation should keep the second user turn minimal while relying on recorded state');
225+
assert(/RECORDED_CONTINUITY:.*"continuationReady":true/i.test(persistedContinuation.output), 'persisted continuation should record usable continuity between turns');
226+
227+
const persistedDegradedContinuation = await runScenarioWithContinuity([
228+
'help me make a confident synthesis of ticker LUNR in tradingview',
229+
'continue'
230+
], null, [{
231+
captureMode: 'screen-copyfromscreen',
232+
captureTrusted: false,
233+
timestamp: 222,
234+
windowTitle: 'Desktop'
235+
}]);
236+
assert.strictEqual(persistedDegradedContinuation.exitCode, 0, 'persisted degraded continuation should exit successfully');
237+
assert(persistedDegradedContinuation.output.includes('EXECUTE_COUNT:1'), 'persisted degraded continuation should block the second execution');
238+
assert(/Continuity is currently degraded/i.test(persistedDegradedContinuation.output), 'persisted degraded continuation should explain degraded recovery requirements');
239+
assert(/RECORDED_CONTINUITY:.*"continuationReady":false/i.test(persistedDegradedContinuation.output), 'persisted degraded continuation should record degraded continuity after the first turn');
240+
241+
const degradedContinuation = await runScenarioWithContinuity(['continue'], {
242+
activeGoal: 'Produce a confident synthesis of ticker LUNR in TradingView',
243+
currentSubgoal: 'Inspect the active TradingView chart',
244+
continuationReady: false,
245+
degradedReason: 'Visual evidence fell back to full-screen capture instead of a trusted target-window capture.',
246+
lastTurn: {
247+
verificationStatus: 'verified',
248+
captureMode: 'screen-copyfromscreen',
249+
captureTrusted: false,
250+
nextRecommendedStep: 'Continue from the latest chart evidence.'
251+
}
252+
});
253+
assert.strictEqual(degradedContinuation.exitCode, 0, 'degraded continuation scenario should exit successfully');
254+
assert(degradedContinuation.output.includes('EXECUTE_COUNT:0'), 'degraded continuation should not execute emitted actions');
255+
assert(/Continuity is currently degraded/i.test(degradedContinuation.output), 'degraded continuation should explain recovery-oriented continuity blocking');
256+
257+
const contradictedContinuation = await runScenarioWithContinuity(['continue'], {
258+
activeGoal: 'Add a TradingView indicator and verify it on chart',
259+
currentSubgoal: 'Verify the indicator is present',
260+
continuationReady: false,
261+
degradedReason: 'The latest evidence contradicts the claimed result.',
262+
lastTurn: {
263+
verificationStatus: 'contradicted',
264+
captureMode: 'window-copyfromscreen',
265+
captureTrusted: true,
266+
nextRecommendedStep: 'Retry indicator search before claiming success.'
267+
}
268+
});
269+
assert.strictEqual(contradictedContinuation.exitCode, 0, 'contradicted continuation scenario should exit successfully');
270+
assert(contradictedContinuation.output.includes('EXECUTE_COUNT:0'), 'contradicted continuation should not execute emitted actions');
271+
assert(/contradicted by the latest evidence/i.test(contradictedContinuation.output), 'contradicted continuation should explain why blind continuation is blocked');
272+
126273
const acknowledgement = await runScenario(['thanks']);
127274
assert.strictEqual(acknowledgement.exitCode, 0, 'acknowledgement-style scenario should exit successfully');
128275
assert(acknowledgement.output.includes('EXECUTE_COUNT:0'), 'acknowledgement-style scenario should not execute emitted actions');

0 commit comments

Comments
 (0)