TayDa64
diff --git a/‎TESTING.md‎
Lines changed: 36 additions & 0 deletions b/‎TESTING.md‎
Lines changed: 36 additions & 0 deletions
diff --git a/‎docs/CHAT_CONTINUITY_IMPLEMENTATION_PLAN.md‎
Lines changed: 48 additions & 0 deletions b/‎docs/CHAT_CONTINUITY_IMPLEMENTATION_PLAN.md‎
Lines changed: 48 additions & 0 deletions
diff --git a/‎docs/RUNTIME_REGRESSION_WORKFLOW.md‎
Lines changed: 145 additions & 0 deletions b/‎docs/RUNTIME_REGRESSION_WORKFLOW.md‎
Lines changed: 145 additions & 0 deletions
diff --git a/‎package.json‎
Lines changed: 2 additions & 0 deletions b/‎package.json‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎scripts/extract-transcript-regression.js‎
Lines changed: 92 additions & 0 deletions b/‎scripts/extract-transcript-regression.js‎
Lines changed: 92 additions & 0 deletions
@@ -227,6 +227,42 @@ What this covers:
 - cohort filtering to separate pre-fix history from post-fix Phase 3 runs
 - evaluator characterization for transcript expectations without needing a live model run
 
+### Runtime Transcript Regression Pipeline
+
+Use the transcript regression pipeline when you already have a sanitized `liku chat` transcript or an inline-proof `.log` artifact and want to promote it into a checked-in regression fixture quickly:
+
+```bash
+# List checked-in transcript fixtures
+npm run regression:transcripts -- --list
+
+# Run all checked-in transcript fixtures
+npm run regression:transcripts
+
+# Run one fixture only
+npm run regression:transcripts -- --fixture repo-boundary-clarification-runtime
+
+# Generate a fixture skeleton from an existing transcript log
+npm run regression:extract -- --transcript-file C:\path\to\runtime.log --fixture-name repo-boundary-clarification
+
+# Or print a fixture skeleton without writing a file
+npm run regression:extract -- --transcript-file C:\path\to\runtime.log --stdout-only
+```
+
+What this covers:
+
+- checked-in sanitized transcript fixtures under `scripts/fixtures/transcripts/`
+- deterministic evaluation of transcript expectations without a live model call
+- rapid conversion of a real runtime failure into a reusable fixture skeleton
+- reuse of the same transcript parsing/evaluation semantics already used by the inline-proof harness
+
+Recommended workflow:
+
+1. capture or identify the runtime transcript/log you want to preserve
+2. sanitize it down to the smallest transcript snippet that still proves the failure or behavior
+3. run `regression:extract` to generate a fixture skeleton
+4. tighten the generated expectations manually so they assert the real invariant, not incidental phrasing
+5. run `regression:transcripts` and the nearest behavior test before committing
+
 ### Manual Checks for Model Selection
 
 When changing model-selection UX or Copilot routing, add these checks:
 
@@ -2327,6 +2327,38 @@ The most credible next roadmap is:
 
 ### Roadmap N4 — Capability-policy matrix by app and surface class
 
+**Status (2026-03-30)**
+- first runtime matrix slice implemented
+- landed via:
+  - `src/main/capability-policy.js`
+  - `src/main/ai-service/message-builder.js`
+  - `src/main/ai-service/policy-enforcement.js`
+  - `src/main/ai-service.js`
+  - `scripts/test-capability-policy.js`
+  - `scripts/test-ai-service-policy.js`
+- current scope:
+  - added a built-in runtime capability-policy matrix for the canonical surface classes:
+    - `browser`
+    - `uia-rich`
+    - `visual-first-low-uia`
+    - `keyboard-window-first`
+  - the runtime policy snapshot now exposes normalized support dimensions for each surface/app combination:
+    - semantic control
+    - keyboard control
+    - trustworthy background capture
+    - precise placement
+    - bounded text extraction
+    - approval-time recovery
+  - prompt assembly now emits capability-policy snapshot context instead of relying only on inline surface heuristics
+  - action-plan enforcement now applies narrow built-in matrix checks in addition to existing per-app `actionPolicies` / `negativePolicies`
+  - TradingView now rides the generic `visual-first-low-uia` matrix as a first overlay for chart-evidence honesty and precise-placement bounds
+  - TradingView overlay metadata now pulls from existing verification/shortcut helpers so the runtime policy snapshot can surface:
+    - trading mode hints (`paper` / `live` / `unknown`)
+    - stable default shortcuts
+    - customizable shortcuts
+    - paper-test-only shortcut groups
+  - existing visual trust and background-capture signals are reused as policy inputs rather than duplicated into a second evidence model
+
 **Why this should be next**
 - Several current safety and honesty wins are still encoded as targeted TradingView or low-UIA heuristics.
 - The next architectural step is to formalize those rules into a reusable capability-policy layer.
@@ -2359,6 +2391,22 @@ The most credible next roadmap is:
 
 ### Roadmap N5 — Runtime transcript to regression pipeline
 
+**Status (2026-03-30)**
+- first transcript-ingestion slice implemented
+- landed via:
+  - `scripts/transcript-regression-fixtures.js`
+  - `scripts/extract-transcript-regression.js`
+  - `scripts/run-transcript-regressions.js`
+  - `scripts/test-transcript-regression-pipeline.js`
+  - `scripts/fixtures/transcripts/inline-proof-chat-regressions.json`
+  - `docs/RUNTIME_REGRESSION_WORKFLOW.md`
+- current scope:
+  - added a checked-in transcript fixture format for sanitized `liku chat` regressions
+  - added an extraction helper that turns a runtime transcript or inline-proof log into a fixture skeleton
+  - added a fixture-driven runner that reuses the existing inline-proof transcript evaluator instead of introducing a second regression engine
+  - seeded the pipeline with checked-in transcript fixtures for repo-boundary and forgone-feature regressions
+  - documented the `runtime finding -> fixture -> focused rerun -> commit` workflow in repo docs and testing commands
+
 **Why this should be next**
 - The strongest recent improvements all came from real runtime transcripts, then hand-converted into tests.
 - That workflow works, but it is still too manual and easy to delay.
 
@@ -0,0 +1,145 @@
+# Runtime Regression Workflow
+
+## Goal
+
+Turn a real `liku chat` runtime finding into a checked-in, repeatable regression with as little friction as possible.
+
+This first N5 slice intentionally reuses the existing inline-proof transcript evaluator instead of introducing a second transcript engine. The workflow is:
+
+1. capture a runtime transcript or reuse an inline-proof `.log`
+2. sanitize it down to the smallest useful snippet
+3. generate a transcript fixture skeleton
+4. tighten the generated expectations
+5. run transcript regressions and the nearest focused behavior test
+6. commit the fixture and the behavioral fix together
+
+## Inputs supported in this slice
+
+- plaintext `liku chat` transcripts
+- inline-proof logs from `~/.liku/traces/chat-inline-proof/*.log`
+- pasted transcript text over stdin
+
+Out of scope for this first slice:
+
+- automatic replay of JSONL telemetry or agent-trace files
+- full transcript-to-test generation without manual expectation review
+- broad redaction/policy redesign for runtime capture
+
+## Fixture format
+
+Checked-in transcript fixtures live under:
+
+- `scripts/fixtures/transcripts/`
+
+The fixture bundle format is JSON with multiple named cases at the top level. Each case can include:
+
+- `description`
+- `source`
+  - `capturedAt`
+  - `tracePath` when relevant
+  - observed provider/model metadata when available
+- `transcriptLines`
+- optional derived fields such as `prompts`, `assistantTurns`, and `observedHeaders`
+- `notes`
+- `expectations`
+
+Expectation semantics intentionally mirror the inline-proof harness:
+
+- `scope: transcript` for whole-transcript checks
+- `turn` for assistant-turn-specific checks
+- `include`
+- `exclude`
+- `count`
+
+Pattern entries are stored as JSON regex specs:
+
+- `{ "regex": "Provider:\\s+copilot", "flags": "i" }`
+
+## Commands
+
+List transcript fixtures:
+
+- `npm run regression:transcripts -- --list`
+
+Run all transcript fixtures:
+
+- `npm run regression:transcripts`
+
+Run a single transcript fixture:
+
+- `npm run regression:transcripts -- --fixture repo-boundary-clarification-runtime`
+
+Generate a fixture skeleton from a transcript file:
+
+- `npm run regression:extract -- --transcript-file C:\path\to\runtime.log --fixture-name repo-boundary-clarification`
+
+Print a fixture skeleton without writing a file:
+
+- `npm run regression:extract -- --transcript-file C:\path\to\runtime.log --stdout-only`
+
+## Recommended loop
+
+### 1. Capture the failure
+
+Prefer one of these sources:
+
+- a fresh `liku chat` transcript
+- an inline-proof log already saved under `~/.liku/traces/chat-inline-proof/`
+- a small hand-curated transcript excerpt from a runtime session
+
+Keep only the lines that prove the invariant you care about. Smaller fixtures are easier to review and less brittle.
+
+### 2. Generate a fixture skeleton
+
+Run `regression:extract` against the sanitized transcript.
+
+The helper derives:
+
+- a fixture name
+- prompts
+- assistant turns
+- observed provider/model headers
+- placeholder expectations
+
+Treat those expectations as a draft, not finished truth.
+
+### 3. Tighten expectations manually
+
+Before checking in the fixture:
+
+- remove incidental wording matches
+- keep only invariants that prove the bug fix or safety behavior
+- add `exclude` or `count` checks when they make the regression sharper
+
+Good transcript fixtures assert the behavior that matters, not every line in the transcript.
+
+### 4. Run the transcript regression and the nearest focused seam test
+
+Minimum validation:
+
+- `npm run regression:transcripts`
+- `node scripts/test-transcript-regression-pipeline.js`
+
+Then run the nearest behavioral regression for the feature you touched, for example:
+
+- `node scripts/test-windows-observation-flow.js`
+- `node scripts/test-chat-actionability.js`
+- `node scripts/test-bug-fixes.js`
+
+### 5. Commit the fixture with the fix
+
+The preferred N5 habit is:
+
+- runtime finding
+- transcript fixture
+- focused code/test fix
+- commit
+
+That keeps new hardening work grounded in observed runtime behavior instead of reconstructed memory.
+
+## Practical guidelines
+
+1. Prefer sanitized transcript snippets over full raw dumps.
+2. Use one fixture bundle with several named cases when the domain is closely related.
+3. Keep transcript fixtures deterministic and stable enough to survive harmless wording drift.
+4. If a transcript fixture starts growing broad, add or retain a narrower behavior test alongside it.
@@ -16,6 +16,8 @@
     "test:skills:inline": "node scripts/test-skill-inline-smoothness.js",
     "proof:inline": "node scripts/run-chat-inline-proof.js",
     "proof:inline:summary": "node scripts/summarize-chat-inline-proof.js",
+    "regression:extract": "node scripts/extract-transcript-regression.js",
+    "regression:transcripts": "node scripts/run-transcript-regressions.js",
     "smoke:shortcuts": "node scripts/smoke-shortcuts.js",
     "smoke:chat-direct": "node scripts/smoke-chat-direct.js",
     "smoke": "node scripts/smoke-command-system.js",
 
@@ -0,0 +1,92 @@
+#!/usr/bin/env node
+
+const fs = require('fs');
+const path = require('path');
+const {
+  DEFAULT_FIXTURE_DIR,
+  buildFixtureSkeleton,
+  sanitizeFixtureName,
+  upsertFixtureBundleEntry
+} = require(path.join(__dirname, 'transcript-regression-fixtures.js'));
+
+function getArgValue(flagName) {
+  const index = process.argv.indexOf(flagName);
+  if (index >= 0 && index + 1 < process.argv.length) {
+    return process.argv[index + 1];
+  }
+  return null;
+}
+
+function hasFlag(flagName) {
+  return process.argv.includes(flagName);
+}
+
+function readTranscriptInput() {
+  const transcriptFile = getArgValue('--transcript-file');
+  if (transcriptFile) {
+    return {
+      transcript: fs.readFileSync(transcriptFile, 'utf8'),
+      sourceTracePath: transcriptFile
+    };
+  }
+
+  if (!process.stdin.isTTY) {
+    return {
+      transcript: fs.readFileSync(0, 'utf8'),
+      sourceTracePath: null
+    };
+  }
+
+  throw new Error('Provide --transcript-file <path> or pipe transcript text via stdin.');
+}
+
+function resolveOutputFile(fixtureName) {
+  const explicit = getArgValue('--output-file');
+  if (explicit) return explicit;
+  return path.join(DEFAULT_FIXTURE_DIR, `${sanitizeFixtureName(fixtureName || 'runtime-transcript')}.json`);
+}
+
+function main() {
+  const { transcript, sourceTracePath } = readTranscriptInput();
+  const description = getArgValue('--description') || null;
+  const capturedAt = getArgValue('--captured-at') || null;
+  const requestedName = getArgValue('--fixture-name') || null;
+  const skeleton = buildFixtureSkeleton({
+    fixtureName: requestedName,
+    description,
+    transcript,
+    sourceTracePath: getArgValue('--source-trace-path') || sourceTracePath,
+    capturedAt
+  });
+
+  const outputFile = resolveOutputFile(skeleton.fixtureName);
+  const shouldWrite = !hasFlag('--stdout-only');
+
+  if (shouldWrite) {
+    const stored = upsertFixtureBundleEntry(outputFile, skeleton.fixtureName, skeleton.entry, {
+      overwrite: hasFlag('--overwrite')
+    });
+    console.log(`Saved transcript regression fixture: ${stored.filePath}`);
+  }
+
+  console.log(`Fixture: ${skeleton.fixtureName}`);
+  console.log(`Prompts: ${skeleton.entry.prompts.length}`);
+  console.log(`Assistant turns: ${skeleton.entry.assistantTurns.length}`);
+  console.log(`Observed providers: ${(skeleton.entry.observedHeaders.providers || []).join(', ') || 'none'}`);
+  console.log('');
+  console.log(JSON.stringify({ [skeleton.fixtureName]: skeleton.entry }, null, 2));
+}
+
+if (require.main === module) {
+  try {
+    main();
+  } catch (error) {
+    console.error(error.stack || error.message);
+    process.exit(1);
+  }
+}
+
+module.exports = {
+  readTranscriptInput,
+  resolveOutputFile
+};