source-security-dev · pureliture · Jun 19, 2026 · Jun 19, 2026 · Jun 19, 2026
diff --git a/eval/verifier-corpus/README.md b/eval/verifier-corpus/README.md
@@ -26,3 +26,61 @@ uv run security-scanner evaluate \
 
 The `private/` output path is gitignored. Do not commit generated verifier
 artifacts.
+
+## Harness corpus (`harness/`) — infra-free accuracy measurement
+
+The 2-case checkout above needs a live Ollama model. The `harness/` subtree is a
+**finding-level** corpus (N≥20, all six path-roles × TP/FP) that measures verifier
+verdict quality **without** a model or network, so prompt/anchor/threshold changes
+get reproducible before/after numbers in plain `uv run pytest`.
+
+Single source of truth: `src/security_scanner/core/evaluation/verifier_corpus.py`
+(`CORPUS_CASES`). It deterministically generates three committed artifacts:
+
+- `harness/candidates.jsonl` — the scanner "before" set.
+- `harness/expected.json` — ground truth (`load_evaluation_corpus` schema).
+- `harness/recorded-ideal.json` — perfect-model recorded baseline (drop-in slot).
+
+Regenerate after editing `CORPUS_CASES` (a freshness test fails if stale):
+
+```bash
+uv run python -m security_scanner.core.evaluation.verifier_corpus
+```
+
+The harness itself (`core/evaluation/verifier_harness.py`) exposes
+`HeuristicVerifierStrategy` (path-role anchor, deterministic) and
+`RecordedVerifierStrategy` (replays recorded model JSON), both feeding the
+existing `evaluate_verifier_delta` metrics. Gates live in
+`tests/test_verifier_harness.py`, `tests/test_path_role_anchors.py`,
+`tests/test_verifier_confidence_sweep.py`.
+
+## Live model run (box-return, single command)
+
+Real end-to-end accuracy needs the Tailscale Ubuntu box (Ollama + a loaded model).
+No new code is required — it is the one command below. Its output replaces
+`harness/recorded-ideal.json` (or `private/`) and refreshes the recorded baseline:
+
+```bash
+SECURITY_SCANNER_OLLAMA_HOST=http://<box>:11434 \
+SECURITY_SCANNER_OLLAMA_MODEL=<model> \
+uv run security-scanner verify \
+  --findings eval/verifier-corpus/harness/candidates.jsonl \
+  --output private/harness-verified.jsonl
+
+uv run security-scanner evaluate \
+  --expected eval/verifier-corpus/harness/expected.json \
+  --findings eval/verifier-corpus/harness/candidates.jsonl \
+  --after-findings private/harness-verified.jsonl \
+  --precision-min 0.5
+```
+
+Path-role anchors are tunable per organisation via the verifier YAML, e.g.:
+
+```yaml
+verification:
+  path_role_anchors:
+    documentation: { label: true_positive, confidence: 0.9, reason: org policy }
+```
+
+Unspecified roles keep their defaults (partial merge); the default-off behaviour
+is byte-identical to before this corpus existed.
diff --git a/eval/verifier-corpus/harness/candidates.jsonl b/eval/verifier-corpus/harness/candidates.jsonl
diff --git a/eval/verifier-corpus/harness/expected.json b/eval/verifier-corpus/harness/expected.json
@@ -0,0 +1,155 @@
+{
+  "description": "Public-safe finding-level verifier corpus: 6 path-roles x TP/FP for infra-free before/after accuracy measurement.",
+  "expectedFindings": [
+    {
+      "filePath": "config/positive.env",
+      "lineStart": 4,
+      "repoFullName": "synthetic-org/verifier-harness",
+      "ruleId": "synthetic-fake-token"
+    },
+    {
+      "filePath": "config/database.env",
+      "lineStart": 3,
+      "repoFullName": "synthetic-org/verifier-harness",
+      "ruleId": "synthetic-fake-token"
+    },
+    {
+      "filePath": "config/settings.toml",
+      "lineStart": 10,
+      "repoFullName": "synthetic-org/verifier-harness",
+      "ruleId": "synthetic-api-key"
+    },
+    {
+      "filePath": "settings/prod.yaml",
+      "lineStart": 5,
+      "repoFullName": "synthetic-org/verifier-harness",
+      "ruleId": "synthetic-fake-token"
+    },
+    {
+      "filePath": "src/app/secrets.py",
+      "lineStart": 42,
+      "repoFullName": "synthetic-org/verifier-harness",
+      "ruleId": "synthetic-fake-token"
+    },
+    {
+      "filePath": "internal/auth.go",
+      "lineStart": 18,
+      "repoFullName": "synthetic-org/verifier-harness",
+      "ruleId": "synthetic-api-key"
+    },
+    {
+      "filePath": "lib/client.rb",
+      "lineStart": 7,
+      "repoFullName": "synthetic-org/verifier-harness",
+      "ruleId": "synthetic-fake-token"
+    },
+    {
+      "filePath": "services/payment.ts",
+      "lineStart": 25,
+      "repoFullName": "synthetic-org/verifier-harness",
+      "ruleId": "synthetic-fake-token"
+    }
+  ],
+  "knownNegatives": [
+    {
+      "filePath": "docs/example.md",
+      "lineStart": 5,
+      "reason": "Documentation shows a synthetic token-shaped example.",
+      "repoFullName": "synthetic-org/verifier-harness",
+      "ruleId": "synthetic-fake-token"
+    },
+    {
+      "filePath": "docs/setup.md",
+      "lineStart": 12,
+      "reason": "Setup guide illustrates a fake token.",
+      "repoFullName": "synthetic-org/verifier-harness",
+      "ruleId": "synthetic-fake-token"
+    },
+    {
+      "filePath": "README.rst",
+      "lineStart": 4,
+      "reason": "README sample value, not a real credential.",
+      "repoFullName": "synthetic-org/verifier-harness",
+      "ruleId": "synthetic-fake-token"
+    },
+    {
+      "filePath": "docs/api/reference.txt",
+      "lineStart": 9,
+      "reason": "API reference text fixture.",
+      "repoFullName": "synthetic-org/verifier-harness",
+      "ruleId": "synthetic-api-key"
+    },
+    {
+      "filePath": "examples/quickstart.py",
+      "lineStart": 6,
+      "reason": "Quickstart example placeholder.",
+      "repoFullName": "synthetic-org/verifier-harness",
+      "ruleId": "synthetic-fake-token"
+    },
+    {
+      "filePath": "samples/config.env",
+      "lineStart": 3,
+      "reason": "Sample config placeholder.",
+      "repoFullName": "synthetic-org/verifier-harness",
+      "ruleId": "synthetic-fake-token"
+    },
+    {
+      "filePath": "test/fixtures/creds.json",
+      "lineStart": 2,
+      "reason": "Test fixture credentials (synthetic).",
+      "repoFullName": "synthetic-org/verifier-harness",
+      "ruleId": "synthetic-api-key"
+    },
+    {
+      "filePath": "tests/test_login.py",
+      "lineStart": 30,
+      "reason": "Test asserts against a fake token.",
+      "repoFullName": "synthetic-org/verifier-harness",
+      "ruleId": "synthetic-fake-token"
+    },
+    {
+      "filePath": "config/legacy.env",
+      "lineStart": 8,
+      "reason": "Legacy placeholder token left in config; not live.",
+      "repoFullName": "synthetic-org/verifier-harness",
+      "ruleId": "synthetic-fake-token"
+    },
+    {
+      "filePath": "config/template.toml",
+      "lineStart": 6,
+      "reason": "Template token placeholder.",
+      "repoFullName": "synthetic-org/verifier-harness",
+      "ruleId": "synthetic-api-key"
+    },
+    {
+      "filePath": "src/utils/format.py",
+      "lineStart": 15,
+      "reason": "Public sample constant shaped like a token.",
+      "repoFullName": "synthetic-org/verifier-harness",
+      "ruleId": "synthetic-fake-token"
+    },
+    {
+      "filePath": "internal/consts.go",
+      "lineStart": 3,
+      "reason": "Documented constant, not a credential.",
+      "repoFullName": "synthetic-org/verifier-harness",
+      "ruleId": "synthetic-fake-token"
+    },
+    {
+      "filePath": "data/blob.bin",
+      "lineStart": 1,
+      "reason": "Opaque blob, role not specific.",
+      "repoFullName": "synthetic-org/verifier-harness",
+      "ruleId": "synthetic-fake-token"
+    },
+    {
+      "filePath": "Makefile",
+      "lineStart": 9,
+      "reason": "Build file reference, role not specific.",
+      "repoFullName": "synthetic-org/verifier-harness",
+      "ruleId": "synthetic-fake-token"
+    }
+  ],
+  "name": "synthetic-verifier-harness-v1",
+  "schemaVersion": 1
+}
diff --git a/eval/verifier-corpus/harness/recorded-ideal.json b/eval/verifier-corpus/harness/recorded-ideal.json
@@ -0,0 +1,115 @@
+{
+  "name": "ideal-perfect-model-v1",
+  "responses": {
+    "finding_02cc46475bfdd191": {
+      "confidence": 0.95,
+      "label": "false_positive",
+      "reason": "API reference text fixture."
+    },
+    "finding_223611539975a65b": {
+      "confidence": 0.95,
+      "label": "false_positive",
+      "reason": "Opaque blob, role not specific."
+    },
+    "finding_24e40c8b470bd6a3": {
+      "confidence": 0.95,
+      "label": "true_positive",
+      "reason": "Synthetic TP case."
+    },
+    "finding_2ee79410cd93d57c": {
+      "confidence": 0.95,
+      "label": "true_positive",
+      "reason": "Synthetic TP case."
+    },
+    "finding_34de71c589210c56": {
+      "confidence": 0.95,
+      "label": "false_positive",
+      "reason": "Build file reference, role not specific."
+    },
+    "finding_3fae230b287cbfb8": {
+      "confidence": 0.95,
+      "label": "false_positive",
+      "reason": "Documentation shows a synthetic token-shaped example."
+    },
+    "finding_40ee8887f222fbd0": {
+      "confidence": 0.95,
+      "label": "false_positive",
+      "reason": "Public sample constant shaped like a token."
+    },
+    "finding_5edd52c83c9d4c88": {
+      "confidence": 0.95,
+      "label": "false_positive",
+      "reason": "Template token placeholder."
+    },
+    "finding_5fa7b574d009b3ed": {
+      "confidence": 0.95,
+      "label": "false_positive",
+      "reason": "Sample config placeholder."
+    },
+    "finding_629466f031c50d61": {
+      "confidence": 0.95,
+      "label": "false_positive",
+      "reason": "README sample value, not a real credential."
+    },
+    "finding_9a09f843316d378e": {
+      "confidence": 0.95,
+      "label": "false_positive",
+      "reason": "Legacy placeholder token left in config; not live."
+    },
+    "finding_a46388d7dd53fd56": {
+      "confidence": 0.95,
+      "label": "false_positive",
+      "reason": "Quickstart example placeholder."
+    },
+    "finding_a678f78485e50ab0": {
+      "confidence": 0.95,
+      "label": "true_positive",
+      "reason": "Synthetic TP case."
+    },
+    "finding_aa1515e56f8c78a2": {
+      "confidence": 0.95,
+      "label": "true_positive",
+      "reason": "Synthetic TP case."
+    },
+    "finding_aae8718fae0433dd": {
+      "confidence": 0.95,
+      "label": "true_positive",
+      "reason": "Synthetic TP case."
+    },
+    "finding_cdb1a4dec3338070": {
+      "confidence": 0.95,
+      "label": "false_positive",
+      "reason": "Test asserts against a fake token."
+    },
+    "finding_e634b9be1f9231d8": {
+      "confidence": 0.95,
+      "label": "false_positive",
+      "reason": "Documented constant, not a credential."
+    },
+    "finding_ea73afe698f3d962": {
+      "confidence": 0.95,
+      "label": "false_positive",
+      "reason": "Test fixture credentials (synthetic)."
+    },
+    "finding_eee7d32f875d1e7e": {
+      "confidence": 0.95,
+      "label": "true_positive",
+      "reason": "Synthetic TP case."
+    },
+    "finding_f5629a9839992c40": {
+      "confidence": 0.95,
+      "label": "false_positive",
+      "reason": "Setup guide illustrates a fake token."
+    },
+    "finding_ff80007ea24ebd40": {
+      "confidence": 0.95,
+      "label": "true_positive",
+      "reason": "Synthetic TP case."
+    },
+    "finding_ffd80950f49a3db2": {
+      "confidence": 0.95,
+      "label": "true_positive",
+      "reason": "Synthetic TP case."
+    }
+  }
+}
diff --git a/src/security_scanner/core/evaluation/__init__.py b/src/security_scanner/core/evaluation/__init__.py
@@ -19,6 +19,18 @@
     render_evaluation_report,
     render_verifier_delta_report,
 )
+from security_scanner.core.evaluation.verifier_corpus import (
+    CORPUS_CASES,
+    build_corpus_candidates,
+    build_evaluation_corpus,
+    build_ideal_responses,
+)
+from security_scanner.core.evaluation.verifier_harness import (
+    HeuristicVerifierStrategy,
+    RecordedVerifierStrategy,
+    run_corpus_delta,
+    verify_candidates,
+)
 
 __all__ = [
     "EvaluationCorpus",
@@ -34,4 +46,12 @@
     "load_evaluation_corpus",
     "render_evaluation_report",
     "render_verifier_delta_report",
+    "CORPUS_CASES",
+    "build_corpus_candidates",
+    "build_evaluation_corpus",
+    "build_ideal_responses",
+    "HeuristicVerifierStrategy",
+    "RecordedVerifierStrategy",
+    "run_corpus_delta",
+    "verify_candidates",
 ]