From a7d673644ae385b9ef7e14b405d35e2cba59ad93 Mon Sep 17 00:00:00 2001
From: pureliture <tkdgur1756@naver.com>
Date: Sat, 20 Jun 2026 00:42:32 +0900
Subject: [PATCH 1/2] feat(verifier): infra-free verdict-quality measurement
 substrate
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

검증 알고리즘 고도화: live Ollama 없이 verdict 품질을 측정/회귀방지하는 결정론적 substrate.

- core/evaluation/verifier_harness.py: Heuristic/Recorded 전략 + run_corpus_delta (metrics.py 재사용)
- core/evaluation/verifier_corpus.py: 단일 SoT CORPUS_CASES(22건, 6 path-role) + 생성기
- eval/verifier-corpus/harness/: candidates.jsonl/expected.json/recorded-ideal.json (생성물, freshness 테스트로 stale 차단)
- prompt.py: path-role 앵커 추출(DEFAULT_PATH_ROLE_ANCHORS) + 설정 가능 path_role_decision
- verifier.py/client.py/verify_artifact.py: yaml verification.path_role_anchors 배선(default byte-identical, I3 보존)
- tests: 정확도 게이트(FR3)/path-role 파라메트라이즈(FR4)/min_confidence sweep(FR6)

heuristic baseline: before precision 0.3636 -> after 0.5714, FP_reduction 8, recall 1.0 보존.
min_confidence 0.60 유지(플래토 <=0.80). 전체 uv run pytest 746 green. I1~I6 불변식 보존.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
---
 eval/verifier-corpus/README.md                |  58 +++
 eval/verifier-corpus/harness/candidates.jsonl |  22 ++
 eval/verifier-corpus/harness/expected.json    | 155 ++++++++
 .../harness/recorded-ideal.json               | 115 ++++++
 .../core/evaluation/__init__.py               |  20 +
 .../core/evaluation/verifier_corpus.py        | 352 ++++++++++++++++++
 .../core/evaluation/verifier_harness.py       | 106 ++++++
 src/security_scanner/llm/common/prompt.py     |  92 ++++-
 src/security_scanner/llm/common/verifier.py   |  13 +-
 src/security_scanner/llm/ollama/client.py     |  11 +-
 .../runtime/verify_artifact.py                |  55 ++-
 tests/test_path_role_anchors.py               | 179 +++++++++
 tests/test_verifier_confidence_sweep.py       |  62 +++
 tests/test_verifier_harness.py                | 145 ++++++++
 14 files changed, 1362 insertions(+), 23 deletions(-)
 create mode 100644 eval/verifier-corpus/harness/candidates.jsonl
 create mode 100644 eval/verifier-corpus/harness/expected.json
 create mode 100644 eval/verifier-corpus/harness/recorded-ideal.json
 create mode 100644 src/security_scanner/core/evaluation/verifier_corpus.py
 create mode 100644 src/security_scanner/core/evaluation/verifier_harness.py
 create mode 100644 tests/test_path_role_anchors.py
 create mode 100644 tests/test_verifier_confidence_sweep.py
 create mode 100644 tests/test_verifier_harness.py

diff --git a/eval/verifier-corpus/README.md b/eval/verifier-corpus/README.md
index 5a7a836..e0c2f41 100644
--- a/eval/verifier-corpus/README.md
+++ b/eval/verifier-corpus/README.md
@@ -26,3 +26,61 @@ uv run security-scanner evaluate \
 
 The `private/` output path is gitignored. Do not commit generated verifier
 artifacts.
+
+## Harness corpus (`harness/`) — infra-free accuracy measurement
+
+The 2-case checkout above needs a live Ollama model. The `harness/` subtree is a
+**finding-level** corpus (N≥20, all six path-roles × TP/FP) that measures verifier
+verdict quality **without** a model or network, so prompt/anchor/threshold changes
+get reproducible before/after numbers in plain `uv run pytest`.
+
+Single source of truth: `src/security_scanner/core/evaluation/verifier_corpus.py`
+(`CORPUS_CASES`). It deterministically generates three committed artifacts:
+
+- `harness/candidates.jsonl` — the scanner "before" set.
+- `harness/expected.json` — ground truth (`load_evaluation_corpus` schema).
+- `harness/recorded-ideal.json` — perfect-model recorded baseline (drop-in slot).
+
+Regenerate after editing `CORPUS_CASES` (a freshness test fails if stale):
+
+```bash
+uv run python -m security_scanner.core.evaluation.verifier_corpus
+```
+
+The harness itself (`core/evaluation/verifier_harness.py`) exposes
+`HeuristicVerifierStrategy` (path-role anchor, deterministic) and
+`RecordedVerifierStrategy` (replays recorded model JSON), both feeding the
+existing `evaluate_verifier_delta` metrics. Gates live in
+`tests/test_verifier_harness.py`, `tests/test_path_role_anchors.py`,
+`tests/test_verifier_confidence_sweep.py`.
+
+## Live model run (box-return, single command)
+
+Real end-to-end accuracy needs the Tailscale Ubuntu box (Ollama + a loaded model).
+No new code is required — it is the one command below. Its output replaces
+`harness/recorded-ideal.json` (or `private/`) and refreshes the recorded baseline:
+
+```bash
+SECURITY_SCANNER_OLLAMA_HOST=http://<box>:11434 \
+SECURITY_SCANNER_OLLAMA_MODEL=<model> \
+uv run security-scanner verify \
+  --findings eval/verifier-corpus/harness/candidates.jsonl \
+  --output private/harness-verified.jsonl
+
+uv run security-scanner evaluate \
+  --expected eval/verifier-corpus/harness/expected.json \
+  --findings eval/verifier-corpus/harness/candidates.jsonl \
+  --after-findings private/harness-verified.jsonl \
+  --precision-min 0.5
+```
+
+Path-role anchors are tunable per organisation via the verifier YAML, e.g.:
+
+```yaml
+verification:
+  path_role_anchors:
+    documentation: { label: true_positive, confidence: 0.9, reason: org policy }
+```
+
+Unspecified roles keep their defaults (partial merge); the default-off behaviour
+is byte-identical to before this corpus existed.
diff --git a/eval/verifier-corpus/harness/candidates.jsonl b/eval/verifier-corpus/harness/candidates.jsonl
new file mode 100644
index 0000000..74cce63
--- /dev/null
+++ b/eval/verifier-corpus/harness/candidates.jsonl
@@ -0,0 +1,22 @@
+{"category": "SECRET", "confidence": "MEDIUM", "evidence": {"contextArtifactRef": null, "redacted": true, "secretHash": "salted-sha256:4790348bcad6a6223dca2af688c82c3d8c2a430a94734b9cc2c1078e688b4822"}, "findingId": "finding_aae8718fae0433dd", "fingerprint": "[\"synthetic-org/verifier-harness\",\"config/positive.env\",4,\"synthetic-fake-token\"]", "gitleaks": null, "location": {"filePath": "config/positive.env", "lineEnd": null, "lineStart": 4}, "repo": {"branch": null, "commit": null, "fullName": "synthetic-org/verifier-harness"}, "ruleId": "synthetic-fake-token", "scan": {"rulePackVersion": "secret-rules-0.1.0", "scanRunId": "scan_harness"}, "severity": "HIGH", "sourceTool": "gitleaks", "sourceToolVersion": null, "status": "OPEN", "triage": {"reason": null, "verdict": "NEEDS_REVIEW", "verifier": null}}
+{"category": "SECRET", "confidence": "MEDIUM", "evidence": {"contextArtifactRef": null, "redacted": true, "secretHash": "salted-sha256:36697503eb7b8e666cbf9826cedc2968069ddca3ddf0cb7f24f685af4dd999a7"}, "findingId": "finding_24e40c8b470bd6a3", "fingerprint": "[\"synthetic-org/verifier-harness\",\"config/database.env\",3,\"synthetic-fake-token\"]", "gitleaks": null, "location": {"filePath": "config/database.env", "lineEnd": null, "lineStart": 3}, "repo": {"branch": null, "commit": null, "fullName": "synthetic-org/verifier-harness"}, "ruleId": "synthetic-fake-token", "scan": {"rulePackVersion": "secret-rules-0.1.0", "scanRunId": "scan_harness"}, "severity": "HIGH", "sourceTool": "gitleaks", "sourceToolVersion": null, "status": "OPEN", "triage": {"reason": null, "verdict": "NEEDS_REVIEW", "verifier": null}}
+{"category": "SECRET", "confidence": "MEDIUM", "evidence": {"contextArtifactRef": null, "redacted": true, "secretHash": "salted-sha256:563816a7ca1664428aea4b64630d829f1147ce6f888d123e69c1e412e75dec9e"}, "findingId": "finding_2ee79410cd93d57c", "fingerprint": "[\"synthetic-org/verifier-harness\",\"config/settings.toml\",10,\"synthetic-api-key\"]", "gitleaks": null, "location": {"filePath": "config/settings.toml", "lineEnd": null, "lineStart": 10}, "repo": {"branch": null, "commit": null, "fullName": "synthetic-org/verifier-harness"}, "ruleId": "synthetic-api-key", "scan": {"rulePackVersion": "secret-rules-0.1.0", "scanRunId": "scan_harness"}, "severity": "HIGH", "sourceTool": "gitleaks", "sourceToolVersion": null, "status": "OPEN", "triage": {"reason": null, "verdict": "NEEDS_REVIEW", "verifier": null}}
+{"category": "SECRET", "confidence": "MEDIUM", "evidence": {"contextArtifactRef": null, "redacted": true, "secretHash": "salted-sha256:05a58bbd6d879652cdffa48cfdeff1b3ede451bd2dda187ed82d67b76ebb214a"}, "findingId": "finding_a678f78485e50ab0", "fingerprint": "[\"synthetic-org/verifier-harness\",\"settings/prod.yaml\",5,\"synthetic-fake-token\"]", "gitleaks": null, "location": {"filePath": "settings/prod.yaml", "lineEnd": null, "lineStart": 5}, "repo": {"branch": null, "commit": null, "fullName": "synthetic-org/verifier-harness"}, "ruleId": "synthetic-fake-token", "scan": {"rulePackVersion": "secret-rules-0.1.0", "scanRunId": "scan_harness"}, "severity": "HIGH", "sourceTool": "gitleaks", "sourceToolVersion": null, "status": "OPEN", "triage": {"reason": null, "verdict": "NEEDS_REVIEW", "verifier": null}}
+{"category": "SECRET", "confidence": "MEDIUM", "evidence": {"contextArtifactRef": null, "redacted": true, "secretHash": "salted-sha256:200378dd0b07270c269d93eb6a6d4ce082b876a9a617d14c0d6bbbf5c8784009"}, "findingId": "finding_ff80007ea24ebd40", "fingerprint": "[\"synthetic-org/verifier-harness\",\"src/app/secrets.py\",42,\"synthetic-fake-token\"]", "gitleaks": null, "location": {"filePath": "src/app/secrets.py", "lineEnd": null, "lineStart": 42}, "repo": {"branch": null, "commit": null, "fullName": "synthetic-org/verifier-harness"}, "ruleId": "synthetic-fake-token", "scan": {"rulePackVersion": "secret-rules-0.1.0", "scanRunId": "scan_harness"}, "severity": "HIGH", "sourceTool": "gitleaks", "sourceToolVersion": null, "status": "OPEN", "triage": {"reason": null, "verdict": "NEEDS_REVIEW", "verifier": null}}
+{"category": "SECRET", "confidence": "MEDIUM", "evidence": {"contextArtifactRef": null, "redacted": true, "secretHash": "salted-sha256:0428bb3795c96342d2e5094f8a685c7db4498570dd82f28fa6642414749ae902"}, "findingId": "finding_ffd80950f49a3db2", "fingerprint": "[\"synthetic-org/verifier-harness\",\"internal/auth.go\",18,\"synthetic-api-key\"]", "gitleaks": null, "location": {"filePath": "internal/auth.go", "lineEnd": null, "lineStart": 18}, "repo": {"branch": null, "commit": null, "fullName": "synthetic-org/verifier-harness"}, "ruleId": "synthetic-api-key", "scan": {"rulePackVersion": "secret-rules-0.1.0", "scanRunId": "scan_harness"}, "severity": "HIGH", "sourceTool": "gitleaks", "sourceToolVersion": null, "status": "OPEN", "triage": {"reason": null, "verdict": "NEEDS_REVIEW", "verifier": null}}
+{"category": "SECRET", "confidence": "MEDIUM", "evidence": {"contextArtifactRef": null, "redacted": true, "secretHash": "salted-sha256:0497dd5b129740eea129100c4b10d9a474df27715c5f58f0d6c1fa58782efd34"}, "findingId": "finding_eee7d32f875d1e7e", "fingerprint": "[\"synthetic-org/verifier-harness\",\"lib/client.rb\",7,\"synthetic-fake-token\"]", "gitleaks": null, "location": {"filePath": "lib/client.rb", "lineEnd": null, "lineStart": 7}, "repo": {"branch": null, "commit": null, "fullName": "synthetic-org/verifier-harness"}, "ruleId": "synthetic-fake-token", "scan": {"rulePackVersion": "secret-rules-0.1.0", "scanRunId": "scan_harness"}, "severity": "HIGH", "sourceTool": "gitleaks", "sourceToolVersion": null, "status": "OPEN", "triage": {"reason": null, "verdict": "NEEDS_REVIEW", "verifier": null}}
+{"category": "SECRET", "confidence": "MEDIUM", "evidence": {"contextArtifactRef": null, "redacted": true, "secretHash": "salted-sha256:5188f495c0e5f4127adfb06b8c1874cdc79c825fa359ba59592183fd35028a39"}, "findingId": "finding_aa1515e56f8c78a2", "fingerprint": "[\"synthetic-org/verifier-harness\",\"services/payment.ts\",25,\"synthetic-fake-token\"]", "gitleaks": null, "location": {"filePath": "services/payment.ts", "lineEnd": null, "lineStart": 25}, "repo": {"branch": null, "commit": null, "fullName": "synthetic-org/verifier-harness"}, "ruleId": "synthetic-fake-token", "scan": {"rulePackVersion": "secret-rules-0.1.0", "scanRunId": "scan_harness"}, "severity": "HIGH", "sourceTool": "gitleaks", "sourceToolVersion": null, "status": "OPEN", "triage": {"reason": null, "verdict": "NEEDS_REVIEW", "verifier": null}}
+{"category": "SECRET", "confidence": "MEDIUM", "evidence": {"contextArtifactRef": null, "redacted": true, "secretHash": "salted-sha256:b201e8ee66351cf1d2865230fdb3d92e697b095da0f5f82f8da70ddfe2fc6c4c"}, "findingId": "finding_3fae230b287cbfb8", "fingerprint": "[\"synthetic-org/verifier-harness\",\"docs/example.md\",5,\"synthetic-fake-token\"]", "gitleaks": null, "location": {"filePath": "docs/example.md", "lineEnd": null, "lineStart": 5}, "repo": {"branch": null, "commit": null, "fullName": "synthetic-org/verifier-harness"}, "ruleId": "synthetic-fake-token", "scan": {"rulePackVersion": "secret-rules-0.1.0", "scanRunId": "scan_harness"}, "severity": "HIGH", "sourceTool": "gitleaks", "sourceToolVersion": null, "status": "OPEN", "triage": {"reason": null, "verdict": "NEEDS_REVIEW", "verifier": null}}
+{"category": "SECRET", "confidence": "MEDIUM", "evidence": {"contextArtifactRef": null, "redacted": true, "secretHash": "salted-sha256:8632a394f8f7b33c2eaf08dafffbd2e2e643deb3a3110ab29e9a9ac4e6d5455d"}, "findingId": "finding_f5629a9839992c40", "fingerprint": "[\"synthetic-org/verifier-harness\",\"docs/setup.md\",12,\"synthetic-fake-token\"]", "gitleaks": null, "location": {"filePath": "docs/setup.md", "lineEnd": null, "lineStart": 12}, "repo": {"branch": null, "commit": null, "fullName": "synthetic-org/verifier-harness"}, "ruleId": "synthetic-fake-token", "scan": {"rulePackVersion": "secret-rules-0.1.0", "scanRunId": "scan_harness"}, "severity": "HIGH", "sourceTool": "gitleaks", "sourceToolVersion": null, "status": "OPEN", "triage": {"reason": null, "verdict": "NEEDS_REVIEW", "verifier": null}}
+{"category": "SECRET", "confidence": "MEDIUM", "evidence": {"contextArtifactRef": null, "redacted": true, "secretHash": "salted-sha256:e20883ba227c9148bc1192464d81655f905ec019a68edd60beba403158fdda60"}, "findingId": "finding_629466f031c50d61", "fingerprint": "[\"synthetic-org/verifier-harness\",\"README.rst\",4,\"synthetic-fake-token\"]", "gitleaks": null, "location": {"filePath": "README.rst", "lineEnd": null, "lineStart": 4}, "repo": {"branch": null, "commit": null, "fullName": "synthetic-org/verifier-harness"}, "ruleId": "synthetic-fake-token", "scan": {"rulePackVersion": "secret-rules-0.1.0", "scanRunId": "scan_harness"}, "severity": "HIGH", "sourceTool": "gitleaks", "sourceToolVersion": null, "status": "OPEN", "triage": {"reason": null, "verdict": "NEEDS_REVIEW", "verifier": null}}
+{"category": "SECRET", "confidence": "MEDIUM", "evidence": {"contextArtifactRef": null, "redacted": true, "secretHash": "salted-sha256:f22a423f1f94bc681bfcec5a6ceb0641c30818d884020cd09ba12c0885aa1302"}, "findingId": "finding_02cc46475bfdd191", "fingerprint": "[\"synthetic-org/verifier-harness\",\"docs/api/reference.txt\",9,\"synthetic-api-key\"]", "gitleaks": null, "location": {"filePath": "docs/api/reference.txt", "lineEnd": null, "lineStart": 9}, "repo": {"branch": null, "commit": null, "fullName": "synthetic-org/verifier-harness"}, "ruleId": "synthetic-api-key", "scan": {"rulePackVersion": "secret-rules-0.1.0", "scanRunId": "scan_harness"}, "severity": "HIGH", "sourceTool": "gitleaks", "sourceToolVersion": null, "status": "OPEN", "triage": {"reason": null, "verdict": "NEEDS_REVIEW", "verifier": null}}
+{"category": "SECRET", "confidence": "MEDIUM", "evidence": {"contextArtifactRef": null, "redacted": true, "secretHash": "salted-sha256:ee4e3939d723306817c21510dec775623e8ff04beabc2b4b02df59050b3eda59"}, "findingId": "finding_a46388d7dd53fd56", "fingerprint": "[\"synthetic-org/verifier-harness\",\"examples/quickstart.py\",6,\"synthetic-fake-token\"]", "gitleaks": null, "location": {"filePath": "examples/quickstart.py", "lineEnd": null, "lineStart": 6}, "repo": {"branch": null, "commit": null, "fullName": "synthetic-org/verifier-harness"}, "ruleId": "synthetic-fake-token", "scan": {"rulePackVersion": "secret-rules-0.1.0", "scanRunId": "scan_harness"}, "severity": "HIGH", "sourceTool": "gitleaks", "sourceToolVersion": null, "status": "OPEN", "triage": {"reason": null, "verdict": "NEEDS_REVIEW", "verifier": null}}
+{"category": "SECRET", "confidence": "MEDIUM", "evidence": {"contextArtifactRef": null, "redacted": true, "secretHash": "salted-sha256:6e131c45305ab502454b2d15d5b575ce9be937230284c98e09ff9ae63fe9143c"}, "findingId": "finding_5fa7b574d009b3ed", "fingerprint": "[\"synthetic-org/verifier-harness\",\"samples/config.env\",3,\"synthetic-fake-token\"]", "gitleaks": null, "location": {"filePath": "samples/config.env", "lineEnd": null, "lineStart": 3}, "repo": {"branch": null, "commit": null, "fullName": "synthetic-org/verifier-harness"}, "ruleId": "synthetic-fake-token", "scan": {"rulePackVersion": "secret-rules-0.1.0", "scanRunId": "scan_harness"}, "severity": "HIGH", "sourceTool": "gitleaks", "sourceToolVersion": null, "status": "OPEN", "triage": {"reason": null, "verdict": "NEEDS_REVIEW", "verifier": null}}
+{"category": "SECRET", "confidence": "MEDIUM", "evidence": {"contextArtifactRef": null, "redacted": true, "secretHash": "salted-sha256:857364ec729f1044484d6008d5059cdda214e293e98b3ce2a7f7680b8fe6ba34"}, "findingId": "finding_ea73afe698f3d962", "fingerprint": "[\"synthetic-org/verifier-harness\",\"test/fixtures/creds.json\",2,\"synthetic-api-key\"]", "gitleaks": null, "location": {"filePath": "test/fixtures/creds.json", "lineEnd": null, "lineStart": 2}, "repo": {"branch": null, "commit": null, "fullName": "synthetic-org/verifier-harness"}, "ruleId": "synthetic-api-key", "scan": {"rulePackVersion": "secret-rules-0.1.0", "scanRunId": "scan_harness"}, "severity": "HIGH", "sourceTool": "gitleaks", "sourceToolVersion": null, "status": "OPEN", "triage": {"reason": null, "verdict": "NEEDS_REVIEW", "verifier": null}}
+{"category": "SECRET", "confidence": "MEDIUM", "evidence": {"contextArtifactRef": null, "redacted": true, "secretHash": "salted-sha256:933a4f9eb729537d9494458accaf80c49ceb42ec54bc01fc026af28fa5d01d18"}, "findingId": "finding_cdb1a4dec3338070", "fingerprint": "[\"synthetic-org/verifier-harness\",\"tests/test_login.py\",30,\"synthetic-fake-token\"]", "gitleaks": null, "location": {"filePath": "tests/test_login.py", "lineEnd": null, "lineStart": 30}, "repo": {"branch": null, "commit": null, "fullName": "synthetic-org/verifier-harness"}, "ruleId": "synthetic-fake-token", "scan": {"rulePackVersion": "secret-rules-0.1.0", "scanRunId": "scan_harness"}, "severity": "HIGH", "sourceTool": "gitleaks", "sourceToolVersion": null, "status": "OPEN", "triage": {"reason": null, "verdict": "NEEDS_REVIEW", "verifier": null}}
+{"category": "SECRET", "confidence": "MEDIUM", "evidence": {"contextArtifactRef": null, "redacted": true, "secretHash": "salted-sha256:8e4a339f33bc167a25bf9779f15d7a135d4024a8a80147675763295d87ba3bcb"}, "findingId": "finding_9a09f843316d378e", "fingerprint": "[\"synthetic-org/verifier-harness\",\"config/legacy.env\",8,\"synthetic-fake-token\"]", "gitleaks": null, "location": {"filePath": "config/legacy.env", "lineEnd": null, "lineStart": 8}, "repo": {"branch": null, "commit": null, "fullName": "synthetic-org/verifier-harness"}, "ruleId": "synthetic-fake-token", "scan": {"rulePackVersion": "secret-rules-0.1.0", "scanRunId": "scan_harness"}, "severity": "HIGH", "sourceTool": "gitleaks", "sourceToolVersion": null, "status": "OPEN", "triage": {"reason": null, "verdict": "NEEDS_REVIEW", "verifier": null}}
+{"category": "SECRET", "confidence": "MEDIUM", "evidence": {"contextArtifactRef": null, "redacted": true, "secretHash": "salted-sha256:dc02430c55c4e47098c3e717b0fba3f9598616ebc2b94ff0ea6bdf17572626a7"}, "findingId": "finding_5edd52c83c9d4c88", "fingerprint": "[\"synthetic-org/verifier-harness\",\"config/template.toml\",6,\"synthetic-api-key\"]", "gitleaks": null, "location": {"filePath": "config/template.toml", "lineEnd": null, "lineStart": 6}, "repo": {"branch": null, "commit": null, "fullName": "synthetic-org/verifier-harness"}, "ruleId": "synthetic-api-key", "scan": {"rulePackVersion": "secret-rules-0.1.0", "scanRunId": "scan_harness"}, "severity": "HIGH", "sourceTool": "gitleaks", "sourceToolVersion": null, "status": "OPEN", "triage": {"reason": null, "verdict": "NEEDS_REVIEW", "verifier": null}}
+{"category": "SECRET", "confidence": "MEDIUM", "evidence": {"contextArtifactRef": null, "redacted": true, "secretHash": "salted-sha256:15f3e7400e3a2aa1f4da8f9abf3fa23a2de27bf227c418104b08cf8fff11c239"}, "findingId": "finding_40ee8887f222fbd0", "fingerprint": "[\"synthetic-org/verifier-harness\",\"src/utils/format.py\",15,\"synthetic-fake-token\"]", "gitleaks": null, "location": {"filePath": "src/utils/format.py", "lineEnd": null, "lineStart": 15}, "repo": {"branch": null, "commit": null, "fullName": "synthetic-org/verifier-harness"}, "ruleId": "synthetic-fake-token", "scan": {"rulePackVersion": "secret-rules-0.1.0", "scanRunId": "scan_harness"}, "severity": "HIGH", "sourceTool": "gitleaks", "sourceToolVersion": null, "status": "OPEN", "triage": {"reason": null, "verdict": "NEEDS_REVIEW", "verifier": null}}
+{"category": "SECRET", "confidence": "MEDIUM", "evidence": {"contextArtifactRef": null, "redacted": true, "secretHash": "salted-sha256:922626e7698512c84d1cd796836be9bdb55d019d374811af959b4b2e7676d88e"}, "findingId": "finding_e634b9be1f9231d8", "fingerprint": "[\"synthetic-org/verifier-harness\",\"internal/consts.go\",3,\"synthetic-fake-token\"]", "gitleaks": null, "location": {"filePath": "internal/consts.go", "lineEnd": null, "lineStart": 3}, "repo": {"branch": null, "commit": null, "fullName": "synthetic-org/verifier-harness"}, "ruleId": "synthetic-fake-token", "scan": {"rulePackVersion": "secret-rules-0.1.0", "scanRunId": "scan_harness"}, "severity": "HIGH", "sourceTool": "gitleaks", "sourceToolVersion": null, "status": "OPEN", "triage": {"reason": null, "verdict": "NEEDS_REVIEW", "verifier": null}}
+{"category": "SECRET", "confidence": "MEDIUM", "evidence": {"contextArtifactRef": null, "redacted": true, "secretHash": "salted-sha256:ba447519f43eda1b96a5adc9c885539391eddfccc83c40f6adfacb69b287b78f"}, "findingId": "finding_223611539975a65b", "fingerprint": "[\"synthetic-org/verifier-harness\",\"data/blob.bin\",1,\"synthetic-fake-token\"]", "gitleaks": null, "location": {"filePath": "data/blob.bin", "lineEnd": null, "lineStart": 1}, "repo": {"branch": null, "commit": null, "fullName": "synthetic-org/verifier-harness"}, "ruleId": "synthetic-fake-token", "scan": {"rulePackVersion": "secret-rules-0.1.0", "scanRunId": "scan_harness"}, "severity": "HIGH", "sourceTool": "gitleaks", "sourceToolVersion": null, "status": "OPEN", "triage": {"reason": null, "verdict": "NEEDS_REVIEW", "verifier": null}}
+{"category": "SECRET", "confidence": "MEDIUM", "evidence": {"contextArtifactRef": null, "redacted": true, "secretHash": "salted-sha256:f67156ede8c78a437d075be8251a6b1146d095262a67ab9b14541866abe31515"}, "findingId": "finding_34de71c589210c56", "fingerprint": "[\"synthetic-org/verifier-harness\",\"Makefile\",9,\"synthetic-fake-token\"]", "gitleaks": null, "location": {"filePath": "Makefile", "lineEnd": null, "lineStart": 9}, "repo": {"branch": null, "commit": null, "fullName": "synthetic-org/verifier-harness"}, "ruleId": "synthetic-fake-token", "scan": {"rulePackVersion": "secret-rules-0.1.0", "scanRunId": "scan_harness"}, "severity": "HIGH", "sourceTool": "gitleaks", "sourceToolVersion": null, "status": "OPEN", "triage": {"reason": null, "verdict": "NEEDS_REVIEW", "verifier": null}}
diff --git a/eval/verifier-corpus/harness/expected.json b/eval/verifier-corpus/harness/expected.json
new file mode 100644
index 0000000..5d7df94
--- /dev/null
+++ b/eval/verifier-corpus/harness/expected.json
@@ -0,0 +1,155 @@
+{
+  "description": "Public-safe finding-level verifier corpus: 6 path-roles x TP/FP for infra-free before/after accuracy measurement.",
+  "expectedFindings": [
+    {
+      "filePath": "config/positive.env",
+      "lineStart": 4,
+      "repoFullName": "synthetic-org/verifier-harness",
+      "ruleId": "synthetic-fake-token"
+    },
+    {
+      "filePath": "config/database.env",
+      "lineStart": 3,
+      "repoFullName": "synthetic-org/verifier-harness",
+      "ruleId": "synthetic-fake-token"
+    },
+    {
+      "filePath": "config/settings.toml",
+      "lineStart": 10,
+      "repoFullName": "synthetic-org/verifier-harness",
+      "ruleId": "synthetic-api-key"
+    },
+    {
+      "filePath": "settings/prod.yaml",
+      "lineStart": 5,
+      "repoFullName": "synthetic-org/verifier-harness",
+      "ruleId": "synthetic-fake-token"
+    },
+    {
+      "filePath": "src/app/secrets.py",
+      "lineStart": 42,
+      "repoFullName": "synthetic-org/verifier-harness",
+      "ruleId": "synthetic-fake-token"
+    },
+    {
+      "filePath": "internal/auth.go",
+      "lineStart": 18,
+      "repoFullName": "synthetic-org/verifier-harness",
+      "ruleId": "synthetic-api-key"
+    },
+    {
+      "filePath": "lib/client.rb",
+      "lineStart": 7,
+      "repoFullName": "synthetic-org/verifier-harness",
+      "ruleId": "synthetic-fake-token"
+    },
+    {
+      "filePath": "services/payment.ts",
+      "lineStart": 25,
+      "repoFullName": "synthetic-org/verifier-harness",
+      "ruleId": "synthetic-fake-token"
+    }
+  ],
+  "knownNegatives": [
+    {
+      "filePath": "docs/example.md",
+      "lineStart": 5,
+      "reason": "Documentation shows a synthetic token-shaped example.",
+      "repoFullName": "synthetic-org/verifier-harness",
+      "ruleId": "synthetic-fake-token"
+    },
+    {
+      "filePath": "docs/setup.md",
+      "lineStart": 12,
+      "reason": "Setup guide illustrates a fake token.",
+      "repoFullName": "synthetic-org/verifier-harness",
+      "ruleId": "synthetic-fake-token"
+    },
+    {
+      "filePath": "README.rst",
+      "lineStart": 4,
+      "reason": "README sample value, not a real credential.",
+      "repoFullName": "synthetic-org/verifier-harness",
+      "ruleId": "synthetic-fake-token"
+    },
+    {
+      "filePath": "docs/api/reference.txt",
+      "lineStart": 9,
+      "reason": "API reference text fixture.",
+      "repoFullName": "synthetic-org/verifier-harness",
+      "ruleId": "synthetic-api-key"
+    },
+    {
+      "filePath": "examples/quickstart.py",
+      "lineStart": 6,
+      "reason": "Quickstart example placeholder.",
+      "repoFullName": "synthetic-org/verifier-harness",
+      "ruleId": "synthetic-fake-token"
+    },
+    {
+      "filePath": "samples/config.env",
+      "lineStart": 3,
+      "reason": "Sample config placeholder.",
+      "repoFullName": "synthetic-org/verifier-harness",
+      "ruleId": "synthetic-fake-token"
+    },
+    {
+      "filePath": "test/fixtures/creds.json",
+      "lineStart": 2,
+      "reason": "Test fixture credentials (synthetic).",
+      "repoFullName": "synthetic-org/verifier-harness",
+      "ruleId": "synthetic-api-key"
+    },
+    {
+      "filePath": "tests/test_login.py",
+      "lineStart": 30,
+      "reason": "Test asserts against a fake token.",
+      "repoFullName": "synthetic-org/verifier-harness",
+      "ruleId": "synthetic-fake-token"
+    },
+    {
+      "filePath": "config/legacy.env",
+      "lineStart": 8,
+      "reason": "Legacy placeholder token left in config; not live.",
+      "repoFullName": "synthetic-org/verifier-harness",
+      "ruleId": "synthetic-fake-token"
+    },
+    {
+      "filePath": "config/template.toml",
+      "lineStart": 6,
+      "reason": "Template token placeholder.",
+      "repoFullName": "synthetic-org/verifier-harness",
+      "ruleId": "synthetic-api-key"
+    },
+    {
+      "filePath": "src/utils/format.py",
+      "lineStart": 15,
+      "reason": "Public sample constant shaped like a token.",
+      "repoFullName": "synthetic-org/verifier-harness",
+      "ruleId": "synthetic-fake-token"
+    },
+    {
+      "filePath": "internal/consts.go",
+      "lineStart": 3,
+      "reason": "Documented constant, not a credential.",
+      "repoFullName": "synthetic-org/verifier-harness",
+      "ruleId": "synthetic-fake-token"
+    },
+    {
+      "filePath": "data/blob.bin",
+      "lineStart": 1,
+      "reason": "Opaque blob, role not specific.",
+      "repoFullName": "synthetic-org/verifier-harness",
+      "ruleId": "synthetic-fake-token"
+    },
+    {
+      "filePath": "Makefile",
+      "lineStart": 9,
+      "reason": "Build file reference, role not specific.",
+      "repoFullName": "synthetic-org/verifier-harness",
+      "ruleId": "synthetic-fake-token"
+    }
+  ],
+  "name": "synthetic-verifier-harness-v1",
+  "schemaVersion": 1
+}
diff --git a/eval/verifier-corpus/harness/recorded-ideal.json b/eval/verifier-corpus/harness/recorded-ideal.json
new file mode 100644
index 0000000..7552c3d
--- /dev/null
+++ b/eval/verifier-corpus/harness/recorded-ideal.json
@@ -0,0 +1,115 @@
+{
+  "name": "ideal-perfect-model-v1",
+  "responses": {
+    "finding_02cc46475bfdd191": {
+      "confidence": 0.95,
+      "label": "false_positive",
+      "reason": "API reference text fixture."
+    },
+    "finding_223611539975a65b": {
+      "confidence": 0.95,
+      "label": "false_positive",
+      "reason": "Opaque blob, role not specific."
+    },
+    "finding_24e40c8b470bd6a3": {
+      "confidence": 0.95,
+      "label": "true_positive",
+      "reason": "Synthetic TP case."
+    },
+    "finding_2ee79410cd93d57c": {
+      "confidence": 0.95,
+      "label": "true_positive",
+      "reason": "Synthetic TP case."
+    },
+    "finding_34de71c589210c56": {
+      "confidence": 0.95,
+      "label": "false_positive",
+      "reason": "Build file reference, role not specific."
+    },
+    "finding_3fae230b287cbfb8": {
+      "confidence": 0.95,
+      "label": "false_positive",
+      "reason": "Documentation shows a synthetic token-shaped example."
+    },
+    "finding_40ee8887f222fbd0": {
+      "confidence": 0.95,
+      "label": "false_positive",
+      "reason": "Public sample constant shaped like a token."
+    },
+    "finding_5edd52c83c9d4c88": {
+      "confidence": 0.95,
+      "label": "false_positive",
+      "reason": "Template token placeholder."
+    },
+    "finding_5fa7b574d009b3ed": {
+      "confidence": 0.95,
+      "label": "false_positive",
+      "reason": "Sample config placeholder."
+    },
+    "finding_629466f031c50d61": {
+      "confidence": 0.95,
+      "label": "false_positive",
+      "reason": "README sample value, not a real credential."
+    },
+    "finding_9a09f843316d378e": {
+      "confidence": 0.95,
+      "label": "false_positive",
+      "reason": "Legacy placeholder token left in config; not live."
+    },
+    "finding_a46388d7dd53fd56": {
+      "confidence": 0.95,
+      "label": "false_positive",
+      "reason": "Quickstart example placeholder."
+    },
+    "finding_a678f78485e50ab0": {
+      "confidence": 0.95,
+      "label": "true_positive",
+      "reason": "Synthetic TP case."
+    },
+    "finding_aa1515e56f8c78a2": {
+      "confidence": 0.95,
+      "label": "true_positive",
+      "reason": "Synthetic TP case."
+    },
+    "finding_aae8718fae0433dd": {
+      "confidence": 0.95,
+      "label": "true_positive",
+      "reason": "Synthetic TP case."
+    },
+    "finding_cdb1a4dec3338070": {
+      "confidence": 0.95,
+      "label": "false_positive",
+      "reason": "Test asserts against a fake token."
+    },
+    "finding_e634b9be1f9231d8": {
+      "confidence": 0.95,
+      "label": "false_positive",
+      "reason": "Documented constant, not a credential."
+    },
+    "finding_ea73afe698f3d962": {
+      "confidence": 0.95,
+      "label": "false_positive",
+      "reason": "Test fixture credentials (synthetic)."
+    },
+    "finding_eee7d32f875d1e7e": {
+      "confidence": 0.95,
+      "label": "true_positive",
+      "reason": "Synthetic TP case."
+    },
+    "finding_f5629a9839992c40": {
+      "confidence": 0.95,
+      "label": "false_positive",
+      "reason": "Setup guide illustrates a fake token."
+    },
+    "finding_ff80007ea24ebd40": {
+      "confidence": 0.95,
+      "label": "true_positive",
+      "reason": "Synthetic TP case."
+    },
+    "finding_ffd80950f49a3db2": {
+      "confidence": 0.95,
+      "label": "true_positive",
+      "reason": "Synthetic TP case."
+    }
+  }
+}
diff --git a/src/security_scanner/core/evaluation/__init__.py b/src/security_scanner/core/evaluation/__init__.py
index 14fa951..b269d18 100644
--- a/src/security_scanner/core/evaluation/__init__.py
+++ b/src/security_scanner/core/evaluation/__init__.py
@@ -19,6 +19,18 @@
     render_evaluation_report,
     render_verifier_delta_report,
 )
+from security_scanner.core.evaluation.verifier_corpus import (
+    CORPUS_CASES,
+    build_corpus_candidates,
+    build_evaluation_corpus,
+    build_ideal_responses,
+)
+from security_scanner.core.evaluation.verifier_harness import (
+    HeuristicVerifierStrategy,
+    RecordedVerifierStrategy,
+    run_corpus_delta,
+    verify_candidates,
+)
 
 __all__ = [
     "EvaluationCorpus",
@@ -34,4 +46,12 @@
     "load_evaluation_corpus",
     "render_evaluation_report",
     "render_verifier_delta_report",
+    "CORPUS_CASES",
+    "build_corpus_candidates",
+    "build_evaluation_corpus",
+    "build_ideal_responses",
+    "HeuristicVerifierStrategy",
+    "RecordedVerifierStrategy",
+    "run_corpus_delta",
+    "verify_candidates",
 ]
diff --git a/src/security_scanner/core/evaluation/verifier_corpus.py b/src/security_scanner/core/evaluation/verifier_corpus.py
new file mode 100644
index 0000000..e58f083
--- /dev/null
+++ b/src/security_scanner/core/evaluation/verifier_corpus.py
@@ -0,0 +1,352 @@
+"""Synthetic, finding-level verifier corpus (FR2).
+
+A single source of truth (:data:`CORPUS_CASES`) deterministically derives three
+committed artifacts under ``eval/verifier-corpus/harness/``:
+
+- ``candidates.jsonl`` — the scanner "before" set (every case as a Finding).
+- ``expected.json``     — ground truth in the ``load_evaluation_corpus`` schema.
+- ``recorded-ideal.json`` — a perfect-model recorded baseline (drop-in for the
+  recorded strategy; replaced by real model output on box return, FR8/S2).
+
+All values are synthetic and public-safe (I3/I6): no raw secret, repo, or path
+that maps to a real credential. The "secrets" are ``SCANNER_FAKE_SECRET_TOKEN_*``
+markers that only the synthetic gitleaks rule recognises.
+
+This corpus is finding-level and decoupled from gitleaks; it does NOT replace
+the gitleaks-scannable 2-case checkout used by the live 3-step workflow.
+"""
+
+from __future__ import annotations
+
+import json
+from dataclasses import dataclass
+from pathlib import Path
+
+from security_scanner.core.evaluation.metrics import (
+    EvaluationCorpus,
+    ExpectedFinding,
+)
+from security_scanner.core.finding.model import Finding
+
+REPO = "synthetic-org/verifier-harness"
+SCAN_RUN_ID = "scan_harness"
+RULE_PACK_VERSION = "secret-rules-0.1.0"
+
+TP = "TP"
+FP = "FP"
+
+
+@dataclass(frozen=True)
+class CorpusCase:
+    """One synthetic verifier case. ``label`` is the ground-truth verdict."""
+
+    file_path: str
+    line_start: int
+    rule_id: str
+    label: str  # TP | FP
+    secret_serial: int
+    expected_role: str  # ground-truth path-role classification (FR4 anchor)
+    reason: str = ""
+
+    @property
+    def raw_secret(self) -> str:
+        return f"SCANNER_FAKE_SECRET_TOKEN_{self.secret_serial:06d}"
+
+
+# Single source of truth. Coverage: 6 path-roles x TP/FP where realistic.
+# documentation/example/test carry FP cases only (a real secret in those paths
+# would defeat the coarse path-role anchor by design — that asymmetry IS the
+# anchor's assumption). configuration/source carry both TP (real candidate) and
+# FP (committed placeholder the anchor cannot distinguish). other carries FP
+# that the anchor leaves as needs_review.
+CORPUS_CASES: tuple[CorpusCase, ...] = (
+    # --- TP: configuration ---
+    CorpusCase(
+        "config/positive.env", 4, "synthetic-fake-token", TP, 100001, "configuration"
+    ),
+    CorpusCase(
+        "config/database.env", 3, "synthetic-fake-token", TP, 100002, "configuration"
+    ),
+    CorpusCase(
+        "config/settings.toml", 10, "synthetic-api-key", TP, 100003, "configuration"
+    ),
+    CorpusCase(
+        "settings/prod.yaml", 5, "synthetic-fake-token", TP, 100004, "configuration"
+    ),
+    # --- TP: source ---
+    CorpusCase("src/app/secrets.py", 42, "synthetic-fake-token", TP, 100005, "source"),
+    CorpusCase("internal/auth.go", 18, "synthetic-api-key", TP, 100006, "source"),
+    CorpusCase("lib/client.rb", 7, "synthetic-fake-token", TP, 100007, "source"),
+    CorpusCase("services/payment.ts", 25, "synthetic-fake-token", TP, 100008, "source"),
+    # --- FP: documentation (anchor clears) ---
+    CorpusCase(
+        "docs/example.md",
+        5,
+        "synthetic-fake-token",
+        FP,
+        900001,
+        "documentation",
+        "Documentation shows a synthetic token-shaped example.",
+    ),
+    CorpusCase(
+        "docs/setup.md",
+        12,
+        "synthetic-fake-token",
+        FP,
+        900002,
+        "documentation",
+        "Setup guide illustrates a fake token.",
+    ),
+    CorpusCase(
+        "README.rst",
+        4,
+        "synthetic-fake-token",
+        FP,
+        900003,
+        "documentation",
+        "README sample value, not a real credential.",
+    ),
+    CorpusCase(
+        "docs/api/reference.txt",
+        9,
+        "synthetic-api-key",
+        FP,
+        900004,
+        "documentation",
+        "API reference text fixture.",
+    ),
+    # --- FP: example (anchor clears) ---
+    CorpusCase(
+        "examples/quickstart.py",
+        6,
+        "synthetic-fake-token",
+        FP,
+        900005,
+        "example",
+        "Quickstart example placeholder.",
+    ),
+    CorpusCase(
+        "samples/config.env",
+        3,
+        "synthetic-fake-token",
+        FP,
+        900006,
+        "example",
+        "Sample config placeholder.",
+    ),
+    CorpusCase(
+        "test/fixtures/creds.json",
+        2,
+        "synthetic-api-key",
+        FP,
+        900007,
+        "example",
+        "Test fixture credentials (synthetic).",
+    ),
+    # --- FP: test (anchor clears) ---
+    CorpusCase(
+        "tests/test_login.py",
+        30,
+        "synthetic-fake-token",
+        FP,
+        900008,
+        "test",
+        "Test asserts against a fake token.",
+    ),
+    # --- FP: configuration (anchor cannot distinguish; survives as FP) ---
+    CorpusCase(
+        "config/legacy.env",
+        8,
+        "synthetic-fake-token",
+        FP,
+        900009,
+        "configuration",
+        "Legacy placeholder token left in config; not live.",
+    ),
+    CorpusCase(
+        "config/template.toml",
+        6,
+        "synthetic-api-key",
+        FP,
+        900010,
+        "configuration",
+        "Template token placeholder.",
+    ),
+    # --- FP: source (anchor cannot distinguish; survives as FP) ---
+    CorpusCase(
+        "src/utils/format.py",
+        15,
+        "synthetic-fake-token",
+        FP,
+        900011,
+        "source",
+        "Public sample constant shaped like a token.",
+    ),
+    CorpusCase(
+        "internal/consts.go",
+        3,
+        "synthetic-fake-token",
+        FP,
+        900012,
+        "source",
+        "Documented constant, not a credential.",
+    ),
+    # --- FP: other (anchor leaves needs_review; survives) ---
+    CorpusCase(
+        "data/blob.bin",
+        1,
+        "synthetic-fake-token",
+        FP,
+        900013,
+        "other",
+        "Opaque blob, role not specific.",
+    ),
+    CorpusCase(
+        "Makefile",
+        9,
+        "synthetic-fake-token",
+        FP,
+        900014,
+        "other",
+        "Build file reference, role not specific.",
+    ),
+)
+
+
+def _finding_for(case: CorpusCase) -> Finding:
+    return Finding.create(
+        repo_full_name=REPO,
+        rule_id=case.rule_id,
+        file_path=case.file_path,
+        line_start=case.line_start,
+        raw_secret=case.raw_secret,
+        source_tool="gitleaks",
+        scan_run_id=SCAN_RUN_ID,
+        rule_pack_version=RULE_PACK_VERSION,
+    )
+
+
+def build_corpus_candidates() -> list[Finding]:
+    """Build the scanner 'before' candidate set (every case)."""
+    return [_finding_for(case) for case in CORPUS_CASES]
+
+
+def build_expected_dict() -> dict:
+    """Build the ground-truth dict in the load_evaluation_corpus schema."""
+    expected = [
+        {
+            "repoFullName": REPO,
+            "filePath": case.file_path,
+            "lineStart": case.line_start,
+            "ruleId": case.rule_id,
+        }
+        for case in CORPUS_CASES
+        if case.label == TP
+    ]
+    known_negatives = [
+        {
+            "repoFullName": REPO,
+            "filePath": case.file_path,
+            "lineStart": case.line_start,
+            "ruleId": case.rule_id,
+            "reason": case.reason,
+        }
+        for case in CORPUS_CASES
+        if case.label == FP
+    ]
+    return {
+        "schemaVersion": 1,
+        "name": "synthetic-verifier-harness-v1",
+        "description": (
+            "Public-safe finding-level verifier corpus: 6 path-roles x TP/FP for "
+            "infra-free before/after accuracy measurement."
+        ),
+        "expectedFindings": expected,
+        "knownNegatives": known_negatives,
+    }
+
+
+def build_evaluation_corpus() -> EvaluationCorpus:
+    """Build the EvaluationCorpus ground truth directly from CORPUS_CASES."""
+    data = build_expected_dict()
+    return EvaluationCorpus(
+        schema_version=data["schemaVersion"],
+        name=data["name"],
+        expected_findings=[
+            ExpectedFinding.from_dict(item) for item in data["expectedFindings"]
+        ],
+        known_negative_count=len(data["knownNegatives"]),
+    )
+
+
+def build_ideal_responses(candidates: list[Finding] | None = None) -> dict:
+    """Build a perfect-model recorded baseline keyed by finding_id.
+
+    TP cases -> true_positive; FP cases -> false_positive. Confidence 0.95 so it
+    clears any reasonable min_confidence threshold.
+    """
+    candidates = candidates or build_corpus_candidates()
+    by_id = {finding.finding_id: finding for finding in candidates}
+    responses: dict[str, dict] = {}
+    for case, finding in zip(CORPUS_CASES, build_corpus_candidates()):
+        # finding ids are deterministic; align case label to its finding.
+        fid = finding.finding_id
+        if fid not in by_id:
+            continue
+        label = "true_positive" if case.label == TP else "false_positive"
+        responses[fid] = {
+            "label": label,
+            "confidence": 0.95,
+            "reason": case.reason or f"Synthetic {case.label} case.",
+        }
+    return {"name": "ideal-perfect-model-v1", "responses": responses}
+
+
+def default_harness_dir() -> Path:
+    return Path(__file__).resolve().parents[4] / "eval" / "verifier-corpus" / "harness"
+
+
+def write_corpus(base_dir: Path | str | None = None) -> dict[str, Path]:
+    """Write candidates.jsonl, expected.json, recorded-ideal.json to *base_dir*."""
+    base = Path(base_dir) if base_dir is not None else default_harness_dir()
+    base.mkdir(parents=True, exist_ok=True)
+    candidates = build_corpus_candidates()
+
+    candidates_path = base / "candidates.jsonl"
+    candidates_path.write_text(
+        "".join(
+            json.dumps(finding.to_dict(), ensure_ascii=False, sort_keys=True) + "\n"
+            for finding in candidates
+        ),
+        encoding="utf-8",
+    )
+
+    expected_path = base / "expected.json"
+    expected_path.write_text(
+        json.dumps(build_expected_dict(), ensure_ascii=False, indent=2, sort_keys=True)
+        + "\n",
+        encoding="utf-8",
+    )
+
+    recorded_path = base / "recorded-ideal.json"
+    recorded_path.write_text(
+        json.dumps(
+            build_ideal_responses(candidates),
+            ensure_ascii=False,
+            indent=2,
+            sort_keys=True,
+        )
+        + "\n",
+        encoding="utf-8",
+    )
+    return {
+        "candidates": candidates_path,
+        "expected": expected_path,
+        "recorded": recorded_path,
+    }
+
+
+if __name__ == "__main__":  # pragma: no cover - reproducible generation entrypoint
+    written = write_corpus()
+    for name, path in written.items():
+        print(f"wrote {name}: {path}")
diff --git a/src/security_scanner/core/evaluation/verifier_harness.py b/src/security_scanner/core/evaluation/verifier_harness.py
new file mode 100644
index 0000000..d3d50c5
--- /dev/null
+++ b/src/security_scanner/core/evaluation/verifier_harness.py
@@ -0,0 +1,106 @@
+"""Infra-free verifier accuracy harness (FR1).
+
+Converts corpus candidate findings into verified findings via a pluggable
+strategy, then measures the before/after delta with the existing
+``core.evaluation.metrics`` engine. No live model or network is required, so
+prompt/anchor/threshold changes get reproducible numbers without the Tailscale
+box (HANDOFF §1 OUT / §5 권장 2).
+
+Two strategies:
+
+- :class:`HeuristicVerifierStrategy` — the deterministic path-role anchor
+  (the same ``path_role_decision`` the live prompt uses), gated by
+  ``min_confidence``. Measures anchor quality with zero external dependency.
+- :class:`RecordedVerifierStrategy` — pre-recorded model responses keyed by
+  finding_id, gated through the identical ``parse_verifier_response`` path.
+  Drives threshold calibration and is the drop-in for a real model run.
+"""
+
+from __future__ import annotations
+
+import json
+from collections.abc import Mapping
+from dataclasses import dataclass
+from pathlib import Path
+
+from security_scanner.core.evaluation.metrics import (
+    EvaluationCorpus,
+    EvaluationThresholds,
+    VerifierDeltaResult,
+    evaluate_verifier_delta,
+)
+from security_scanner.core.finding.model import Finding
+from security_scanner.llm.common.prompt import _path_role, path_role_decision
+from security_scanner.llm.common.verifier import (
+    VerifierResult,
+    apply_verifier_result,
+    parse_verifier_response,
+)
+
+
+@dataclass(frozen=True)
+class HeuristicVerifierStrategy:
+    """Deterministic path-role anchor strategy (no model, no network)."""
+
+    anchors: Mapping[str, Mapping[str, object]] | None = None
+    min_confidence: float = 0.60
+
+    def verify(self, finding: Finding) -> VerifierResult:
+        role = _path_role(finding.location.file_path)
+        label, confidence, reason = path_role_decision(role, self.anchors)
+        # Reuse the exact fail-closed gating of the live verify path so the
+        # harness and production share one threshold semantics.
+        return parse_verifier_response(
+            json.dumps({"label": label, "confidence": confidence, "reason": reason}),
+            min_confidence=self.min_confidence,
+        )
+
+
+@dataclass(frozen=True)
+class RecordedVerifierStrategy:
+    """Replay pre-recorded model responses keyed by finding_id."""
+
+    responses: Mapping[str, Mapping[str, object]]
+    min_confidence: float = 0.60
+
+    def verify(self, finding: Finding) -> VerifierResult:
+        record = self.responses.get(finding.finding_id)
+        if record is None:
+            # Fail-closed (I2 spirit): an unrecorded finding is not cleared.
+            return VerifierResult.needs_review("No recorded response for finding.")
+        return parse_verifier_response(
+            json.dumps(dict(record)),
+            min_confidence=self.min_confidence,
+        )
+
+    @classmethod
+    def from_file(
+        cls, path: str | Path, *, min_confidence: float = 0.60
+    ) -> "RecordedVerifierStrategy":
+        data = json.loads(Path(path).read_text(encoding="utf-8"))
+        responses = data.get("responses", data) if isinstance(data, Mapping) else data
+        return cls(responses=responses, min_confidence=min_confidence)
+
+
+def verify_candidates(candidates: list[Finding], strategy) -> list[Finding]:
+    """Apply *strategy* to each candidate, returning verified (triaged) findings."""
+    return [
+        apply_verifier_result(finding, strategy.verify(finding))
+        for finding in candidates
+    ]
+
+
+def run_corpus_delta(
+    corpus: EvaluationCorpus,
+    candidates: list[Finding],
+    strategy,
+    thresholds: EvaluationThresholds | None = None,
+) -> VerifierDeltaResult:
+    """Measure before/after verifier accuracy on a corpus (reuses metrics.py)."""
+    verified = verify_candidates(candidates, strategy)
+    return evaluate_verifier_delta(
+        corpus.expected_findings,
+        candidates,
+        verified,
+        thresholds,
+    )
diff --git a/src/security_scanner/llm/common/prompt.py b/src/security_scanner/llm/common/prompt.py
index 92efe68..0331582 100644
--- a/src/security_scanner/llm/common/prompt.py
+++ b/src/security_scanner/llm/common/prompt.py
@@ -4,16 +4,64 @@
 
 import hashlib
 import json
+from collections.abc import Mapping
 from pathlib import PurePath
 
 from security_scanner.core.finding.model import Finding
 
-
-def build_redacted_prompt(finding: Finding) -> str:
+# Path-role anchors steer the verifier's confidence anchoring without ever
+# touching raw secrets/paths/repos (I3-safe: label/confidence/reason metadata
+# only). The default table reproduces the historical hardcoded behaviour; a
+# yaml `verification.path_role_anchors` override is purely additive.
+_FALSE_POSITIVE_REASON = (
+    "documentation/example/test location is a likely non-production example"
+)
+_TRUE_POSITIVE_REASON = (
+    "configuration/source location is a likely real secret candidate"
+)
+_OTHER_REASON = "path role is not specific enough"
+
+DEFAULT_PATH_ROLE_ANCHORS: Mapping[str, Mapping[str, object]] = {
+    "documentation": {
+        "label": "false_positive",
+        "confidence": 0.80,
+        "reason": _FALSE_POSITIVE_REASON,
+    },
+    "example": {
+        "label": "false_positive",
+        "confidence": 0.80,
+        "reason": _FALSE_POSITIVE_REASON,
+    },
+    "test": {
+        "label": "false_positive",
+        "confidence": 0.80,
+        "reason": _FALSE_POSITIVE_REASON,
+    },
+    "configuration": {
+        "label": "true_positive",
+        "confidence": 0.80,
+        "reason": _TRUE_POSITIVE_REASON,
+    },
+    "source": {
+        "label": "true_positive",
+        "confidence": 0.80,
+        "reason": _TRUE_POSITIVE_REASON,
+    },
+    "other": {"label": "needs_review", "confidence": 0.61, "reason": _OTHER_REASON},
+}
+
+
+def build_redacted_prompt(
+    finding: Finding,
+    *,
+    anchors: Mapping[str, Mapping[str, object]] | None = None,
+) -> str:
     """Build a strict verifier prompt from redacted finding metadata only."""
     path_role = _path_role(finding.location.file_path)
     file_extension = _file_extension(finding.location.file_path)
-    matched_label, matched_confidence, matched_reason = _path_role_decision(path_role)
+    matched_label, matched_confidence, matched_reason = path_role_decision(
+        path_role, anchors
+    )
     metadata = {
         "findingId": finding.finding_id,
         "category": finding.category,
@@ -65,7 +113,11 @@ def _fingerprint(value: str) -> str:
 
 
 def _path_kind(file_path: str) -> str:
-    return "absolute-redacted" if PurePath(file_path).is_absolute() else "relative-redacted"
+    return (
+        "absolute-redacted"
+        if PurePath(file_path).is_absolute()
+        else "relative-redacted"
+    )
 
 
 def _file_extension(file_path: str) -> str:
@@ -98,17 +150,21 @@ def _path_role(file_path: str) -> str:
     return "other"
 
 
-def _path_role_decision(path_role: str) -> tuple[str, float, str]:
-    if path_role in {"documentation", "example", "test"}:
-        return (
-            "false_positive",
-            0.80,
-            "documentation/example/test location is a likely non-production example",
-        )
-    if path_role in {"configuration", "source"}:
-        return (
-            "true_positive",
-            0.80,
-            "configuration/source location is a likely real secret candidate",
-        )
-    return ("needs_review", 0.61, "path role is not specific enough")
+def path_role_decision(
+    path_role: str,
+    anchors: Mapping[str, Mapping[str, object]] | None = None,
+) -> tuple[str, float, str]:
+    """Resolve the (label, confidence, reason) anchor for a path role.
+
+    Falls back to the ``other`` anchor for unknown roles. With ``anchors=None``
+    the result is byte-identical to the historical hardcoded decision.
+    """
+    table = anchors or DEFAULT_PATH_ROLE_ANCHORS
+    anchor = (
+        table.get(path_role) or table.get("other") or DEFAULT_PATH_ROLE_ANCHORS["other"]
+    )
+    return (
+        str(anchor["label"]),
+        float(anchor["confidence"]),
+        str(anchor["reason"]),
+    )
diff --git a/src/security_scanner/llm/common/verifier.py b/src/security_scanner/llm/common/verifier.py
index c639be1..bebe1de 100644
--- a/src/security_scanner/llm/common/verifier.py
+++ b/src/security_scanner/llm/common/verifier.py
@@ -4,7 +4,8 @@
 
 import json
 import re
-from dataclasses import dataclass
+from collections.abc import Mapping
+from dataclasses import dataclass, field
 
 from security_scanner.core.finding.model import Finding, Verdict
 
@@ -34,6 +35,12 @@ class VerifierConfig:
     timeout_seconds: float = 30.0
     min_confidence: float = 0.60
     api_key_env: str | None = None
+    # Optional per-role anchor overrides (label/confidence/reason metadata only,
+    # I3-safe). None preserves the default prompt behaviour byte-for-byte.
+    # Excluded from eq/hash so a dict field never breaks the frozen dataclass.
+    path_role_anchors: Mapping[str, Mapping[str, object]] | None = field(
+        default=None, compare=False, hash=False
+    )
 
 
 @dataclass(frozen=True)
@@ -63,7 +70,9 @@ def needs_review(
         )
 
 
-def parse_verifier_response(raw_content: str, *, min_confidence: float) -> VerifierResult:
+def parse_verifier_response(
+    raw_content: str, *, min_confidence: float
+) -> VerifierResult:
     """Validate a strict JSON verifier response, fail-closed on any ambiguity."""
     try:
         data = json.loads(raw_content)
diff --git a/src/security_scanner/llm/ollama/client.py b/src/security_scanner/llm/ollama/client.py
index 158f03d..388b46d 100644
--- a/src/security_scanner/llm/ollama/client.py
+++ b/src/security_scanner/llm/ollama/client.py
@@ -44,7 +44,9 @@
 class OllamaChatVerifier:
     """Verifier that sends redacted prompts to an Ollama-compatible chat API."""
 
-    def __init__(self, config: VerifierConfig, transport: Transport | None = None) -> None:
+    def __init__(
+        self, config: VerifierConfig, transport: Transport | None = None
+    ) -> None:
         self.config = config
         self._transport = transport or self._default_transport
 
@@ -63,7 +65,12 @@ def verify(self, finding: Finding) -> VerifierResult:
                         "Return only strict JSON."
                     ),
                 },
-                {"role": "user", "content": build_redacted_prompt(finding)},
+                {
+                    "role": "user",
+                    "content": build_redacted_prompt(
+                        finding, anchors=self.config.path_role_anchors
+                    ),
+                },
             ],
         }
         try:
diff --git a/src/security_scanner/runtime/verify_artifact.py b/src/security_scanner/runtime/verify_artifact.py
index 497697e..1b895fe 100644
--- a/src/security_scanner/runtime/verify_artifact.py
+++ b/src/security_scanner/runtime/verify_artifact.py
@@ -3,7 +3,7 @@
 from __future__ import annotations
 
 import os
-from collections.abc import Callable
+from collections.abc import Callable, Mapping
 from dataclasses import dataclass
 from pathlib import Path
 from typing import Protocol
@@ -11,6 +11,7 @@
 import yaml
 
 from security_scanner.core.finding.model import Finding, Status, Verdict
+from security_scanner.llm.common.prompt import DEFAULT_PATH_ROLE_ANCHORS
 from security_scanner.llm.common.verifier import (
     VerifierConfig,
     VerifierResult,
@@ -22,6 +23,8 @@
 
 EnvLookup = Callable[[str], str | None]
 
+_ALLOWED_ANCHOR_LABELS = {"true_positive", "false_positive", "needs_review"}
+
 
 class FindingVerifier(Protocol):
     def verify(self, finding: Finding) -> VerifierResult:
@@ -119,12 +122,16 @@ def resolve_verifier_config(
     api_key_env = ollama_config.get("api_key_env") or env_lookup(
         "SECURITY_SCANNER_OLLAMA_API_KEY_ENV"
     )
+    path_role_anchors = _resolve_path_role_anchors(
+        verification_config.get("path_role_anchors")
+    )
     return VerifierConfig(
         host=str(host),
         model=str(model),
         timeout_seconds=timeout_seconds,
         min_confidence=min_confidence,
         api_key_env=str(api_key_env) if api_key_env else None,
+        path_role_anchors=path_role_anchors,
     )
 
 
@@ -249,6 +256,52 @@ def _status_for_verdict(verdict: str) -> str | None:
     return disposition_status_for_verdict(verdict)
 
 
+def _resolve_path_role_anchors(
+    raw: object,
+) -> dict[str, dict[str, object]] | None:
+    """Validate and merge yaml path-role anchor overrides onto the defaults.
+
+    Returns ``None`` when no override is present so the prompt path stays
+    byte-identical to the historical default. Partial overrides are merged per
+    role onto :data:`DEFAULT_PATH_ROLE_ANCHORS` so unspecified roles keep their
+    default anchor. Raises ``ValueError`` on malformed entries (I3-safe: only
+    label/confidence/reason metadata is accepted).
+    """
+    if raw is None:
+        return None
+    if not isinstance(raw, Mapping):
+        raise ValueError("verification.path_role_anchors must be a mapping")
+
+    merged = {role: dict(anchor) for role, anchor in DEFAULT_PATH_ROLE_ANCHORS.items()}
+    for role, anchor in raw.items():
+        if not isinstance(anchor, Mapping):
+            raise ValueError(f"path_role_anchors[{role}] must be a mapping")
+        label = str(anchor.get("label", "")).strip().lower()
+        if label not in _ALLOWED_ANCHOR_LABELS:
+            raise ValueError(
+                f"path_role_anchors[{role}].label must be one of "
+                f"{sorted(_ALLOWED_ANCHOR_LABELS)}"
+            )
+        try:
+            confidence = float(anchor.get("confidence"))
+        except (TypeError, ValueError) as exc:
+            raise ValueError(
+                f"path_role_anchors[{role}].confidence must be a number"
+            ) from exc
+        if not 0.0 <= confidence <= 1.0:
+            raise ValueError(
+                f"path_role_anchors[{role}].confidence must be between 0 and 1"
+            )
+        reason = str(
+            anchor.get("reason")
+            or DEFAULT_PATH_ROLE_ANCHORS.get(str(role), {}).get(
+                "reason", "path role override"
+            )
+        )
+        merged[str(role)] = {"label": label, "confidence": confidence, "reason": reason}
+    return merged
+
+
 def _load_verifier_config(path: str | Path) -> dict:
     data = yaml.safe_load(Path(path).read_text(encoding="utf-8")) or {}
     if not isinstance(data, dict):
diff --git a/tests/test_path_role_anchors.py b/tests/test_path_role_anchors.py
new file mode 100644
index 0000000..b4b28a8
--- /dev/null
+++ b/tests/test_path_role_anchors.py
@@ -0,0 +1,179 @@
+"""Path-role classification (FR4) and configurable anchors (FR5).
+
+FR4 parametrizes the 6 path-roles plus edge cases that were previously only
+spot-checked. FR5 proves yaml overrides are additive and never weaken the
+default (byte-identical) prompt behaviour, preserving I3.
+"""
+
+from __future__ import annotations
+
+import textwrap
+
+import pytest
+
+from security_scanner.core.evaluation.verifier_corpus import CORPUS_CASES
+from security_scanner.core.finding.model import Finding, GitleaksFindingPayload
+from security_scanner.llm.common.prompt import (
+    DEFAULT_PATH_ROLE_ANCHORS,
+    _path_role,
+    build_redacted_prompt,
+    path_role_decision,
+)
+from security_scanner.runtime.verify_artifact import (
+    VerifierConfigRequest,
+    resolve_verifier_config,
+)
+
+
+def _finding(file_path: str) -> Finding:
+    return Finding.create(
+        repo_full_name="synthetic-org/anchor-repo",
+        rule_id="synthetic-fake-token",
+        file_path=file_path,
+        line_start=3,
+        raw_secret="SCANNER_FAKE_SECRET_TOKEN_424242",
+        source_tool="gitleaks",
+        scan_run_id="scan_anchor",
+        rule_pack_version="secret-rules-0.1.0",
+        gitleaks=GitleaksFindingPayload(
+            rule_id="synthetic-fake-token",
+            file=file_path,
+            start_line=3,
+            secret="SCANNER_FAKE_SECRET_TOKEN_424242",
+            match="token=SCANNER_FAKE_SECRET_TOKEN_424242",
+            fingerprint="synthetic",
+        ),
+    )
+
+
+@pytest.mark.parametrize(
+    "file_path, expected_role",
+    [
+        ("docs/setup.md", "documentation"),
+        ("README.rst", "documentation"),
+        ("notes/info.txt", "documentation"),
+        ("documentation/guide.adoc", "documentation"),  # dir wins over unknown suffix
+        ("examples/quickstart.py", "example"),  # example dir wins over .py source
+        ("samples/config.env", "example"),
+        ("test/fixtures/creds.json", "example"),  # fixtures wins over .json config
+        ("tests/test_login.py", "test"),
+        ("src/test_helpers.py", "test"),  # name startswith test_
+        ("config/app.env", "configuration"),
+        ("settings/prod.yaml", "configuration"),
+        ("deploy/values.toml", "configuration"),
+        ("src/app/secrets.py", "source"),
+        ("internal/auth.go", "source"),
+        ("lib/client.rb", "source"),
+        ("data/blob.bin", "other"),
+        ("Makefile", "other"),
+    ],
+)
+def test_path_role_classification(file_path, expected_role):
+    assert _path_role(file_path) == expected_role
+
+
+def test_every_corpus_case_classifies_to_its_declared_role():
+    for case in CORPUS_CASES:
+        assert _path_role(case.file_path) == case.expected_role
+
+
+@pytest.mark.parametrize(
+    "role, label, confidence",
+    [
+        ("documentation", "false_positive", 0.80),
+        ("example", "false_positive", 0.80),
+        ("test", "false_positive", 0.80),
+        ("configuration", "true_positive", 0.80),
+        ("source", "true_positive", 0.80),
+        ("other", "needs_review", 0.61),
+        ("totally-unknown-role", "needs_review", 0.61),  # falls back to other
+    ],
+)
+def test_default_anchor_decision(role, label, confidence):
+    got_label, got_conf, _ = path_role_decision(role)
+    assert got_label == label
+    assert got_conf == pytest.approx(confidence)
+
+
+def test_default_anchors_keep_prompt_byte_identical():
+    finding = _finding("docs/sample.md")
+    assert build_redacted_prompt(finding) == build_redacted_prompt(
+        finding, anchors=None
+    )
+    assert build_redacted_prompt(finding) == build_redacted_prompt(
+        finding, anchors=DEFAULT_PATH_ROLE_ANCHORS
+    )
+
+
+def _write_config(tmp_path, body: str):
+    path = tmp_path / "verifier.yaml"
+    path.write_text(textwrap.dedent(body), encoding="utf-8")
+    return VerifierConfigRequest(config_path=path)
+
+
+def test_yaml_anchor_override_is_merged_and_applied(tmp_path):
+    request = _write_config(
+        tmp_path,
+        """
+        ollama:
+          host: http://ollama.test
+          model: test-model
+        verification:
+          path_role_anchors:
+            documentation:
+              label: true_positive
+              confidence: 0.95
+              reason: org policy treats docs as real
+        """,
+    )
+
+    config = resolve_verifier_config(request, env_lookup=lambda _name: None)
+
+    # Overridden role reflects the new anchor...
+    assert config.path_role_anchors["documentation"]["label"] == "true_positive"
+    # ...while unspecified roles keep their defaults (partial merge).
+    assert config.path_role_anchors["configuration"]["label"] == "true_positive"
+    assert config.path_role_anchors["other"]["label"] == "needs_review"
+
+    prompt = build_redacted_prompt(
+        _finding("docs/readme.md"), anchors=config.path_role_anchors
+    )
+    assert "Current finding matched label: true_positive." in prompt
+
+
+def test_no_yaml_override_resolves_to_none(tmp_path):
+    request = _write_config(
+        tmp_path,
+        """
+        ollama:
+          host: http://ollama.test
+          model: test-model
+        """,
+    )
+    config = resolve_verifier_config(request, env_lookup=lambda _name: None)
+    assert config.path_role_anchors is None
+
+
+@pytest.mark.parametrize(
+    "anchor_body",
+    [
+        "label: garbage\n              confidence: 0.5",  # invalid label
+        "label: true_positive\n              confidence: 2.0",  # out of range
+        "label: true_positive\n              confidence: not-a-number",  # non-numeric
+    ],
+)
+def test_invalid_yaml_anchor_raises(tmp_path, anchor_body):
+    request = _write_config(
+        tmp_path,
+        f"""
+        ollama:
+          host: http://ollama.test
+          model: test-model
+        verification:
+          path_role_anchors:
+            documentation:
+              {anchor_body}
+        """,
+    )
+    with pytest.raises(ValueError):
+        resolve_verifier_config(request, env_lookup=lambda _name: None)
diff --git a/tests/test_verifier_confidence_sweep.py b/tests/test_verifier_confidence_sweep.py
new file mode 100644
index 0000000..6d0a7f4
--- /dev/null
+++ b/tests/test_verifier_confidence_sweep.py
@@ -0,0 +1,62 @@
+"""min_confidence calibration sweep over the synthetic corpus (FR6).
+
+The default 0.60 threshold had no corpus-level justification (HANDOFF §5
+MISSING #6). This sweep measures the precision/recall trade-off across the
+threshold range and locks the calibration conclusion:
+
+  - Recall is preserved at every threshold (no expected TP is ever cleared,
+    because TP anchors at 0.80 only downgrade to needs_review above 0.80 and a
+    needs_review verdict is still not cleared).
+  - False-positive reduction is maximal on the plateau [0.0, 0.80] and collapses
+    to zero once the threshold passes the 0.80 anchor cliff.
+  - Therefore 0.60 sits safely inside the max-FP-reduction plateau, below the
+    0.80 cliff, and above the 0.61 `other` anchor — the chosen calibration.
+"""
+
+from __future__ import annotations
+
+from security_scanner.core.evaluation import (
+    HeuristicVerifierStrategy,
+    build_corpus_candidates,
+    build_evaluation_corpus,
+    run_corpus_delta,
+)
+
+SWEEP = [0.50, 0.60, 0.70, 0.80, 0.81, 0.90]
+
+
+def _fp_reduction_at(threshold: float) -> int:
+    corpus = build_evaluation_corpus()
+    candidates = build_corpus_candidates()
+    delta = run_corpus_delta(
+        corpus, candidates, HeuristicVerifierStrategy(min_confidence=threshold)
+    )
+    assert delta.recall_preserved is True  # recall preserved across the whole sweep
+    assert delta.after.false_negative_count == 0
+    return delta.false_positive_reduction
+
+
+def test_sweep_recall_preserved_and_plateau_then_cliff():
+    reductions = {thr: _fp_reduction_at(thr) for thr in SWEEP}
+
+    # Plateau: every threshold up to the 0.80 anchor clears the same 8 FPs.
+    for thr in (0.50, 0.60, 0.70, 0.80):
+        assert reductions[thr] == 8
+    # Cliff: above 0.80 the anchor confidence no longer clears anything.
+    for thr in (0.81, 0.90):
+        assert reductions[thr] == 0
+
+
+def test_fp_reduction_is_monotonic_non_increasing():
+    ordered = sorted(SWEEP)
+    reductions = [_fp_reduction_at(thr) for thr in ordered]
+    assert reductions == sorted(reductions, reverse=True)
+    # Load-bearing: a real cliff must exist (not a flat/all-zero sequence that
+    # would also satisfy the non-increasing check above).
+    assert reductions[0] > reductions[-1]
+
+
+def test_default_threshold_is_inside_the_optimal_plateau():
+    # 0.60 is the configured default (verifier.py); it must yield max reduction.
+    plateau_max = max(_fp_reduction_at(thr) for thr in SWEEP)
+    assert _fp_reduction_at(0.60) == plateau_max
diff --git a/tests/test_verifier_harness.py b/tests/test_verifier_harness.py
new file mode 100644
index 0000000..0a89932
--- /dev/null
+++ b/tests/test_verifier_harness.py
@@ -0,0 +1,145 @@
+"""Infra-free verifier accuracy harness tests (FR1, FR3, FR8).
+
+Locks the deterministic before/after baseline so any anchor/threshold change is
+measured, not asserted by inspection. No live model or network is used.
+"""
+
+from __future__ import annotations
+
+import json
+
+import pytest
+
+from security_scanner.core.evaluation import (
+    EvaluationThresholds,
+    HeuristicVerifierStrategy,
+    RecordedVerifierStrategy,
+    build_corpus_candidates,
+    build_evaluation_corpus,
+    build_ideal_responses,
+    render_verifier_delta_report,
+    run_corpus_delta,
+)
+from security_scanner.core.evaluation.verifier_corpus import (
+    CORPUS_CASES,
+    FP,
+    TP,
+    default_harness_dir,
+    write_corpus,
+)
+
+
+def test_corpus_is_large_enough_and_role_diverse():
+    corpus = build_evaluation_corpus()
+    candidates = build_corpus_candidates()
+
+    assert len(candidates) >= 20
+    assert len(corpus.expected_findings) == sum(
+        1 for c in CORPUS_CASES if c.label == TP
+    )
+    assert corpus.known_negative_count == sum(1 for c in CORPUS_CASES if c.label == FP)
+    # All six path-roles are represented.
+    assert {c.expected_role for c in CORPUS_CASES} == {
+        "documentation",
+        "example",
+        "test",
+        "configuration",
+        "source",
+        "other",
+    }
+
+
+def test_heuristic_baseline_preserves_recall_and_reduces_false_positives():
+    """FR3 gate: the path-role anchor must reduce FPs without losing recall."""
+    corpus = build_evaluation_corpus()
+    candidates = build_corpus_candidates()
+
+    delta = run_corpus_delta(
+        corpus, candidates, HeuristicVerifierStrategy(min_confidence=0.60)
+    )
+
+    # Meaningful invariants (I2/I5 spirit): no expected TP is ever cleared.
+    assert delta.recall_preserved is True
+    assert delta.after.false_negative_count == 0
+    assert delta.false_positive_reduction == 8
+    # Locked baseline so a future anchor change is detected.
+    assert delta.before.precision == pytest.approx(8 / 22)
+    assert delta.after.precision == pytest.approx(8 / 14)
+    assert delta.after.recall == pytest.approx(1.0)
+
+
+def test_recorded_ideal_baseline_passes_strict_gate():
+    """FR8: a perfect recorded model clears every FP and keeps every TP."""
+    corpus = build_evaluation_corpus()
+    candidates = build_corpus_candidates()
+    responses = build_ideal_responses(candidates)["responses"]
+
+    delta = run_corpus_delta(
+        corpus,
+        candidates,
+        RecordedVerifierStrategy(responses, min_confidence=0.60),
+        EvaluationThresholds(false_negative_max=0, precision_min=0.90, recall_min=0.99),
+    )
+
+    assert delta.gate.passed is True
+    assert delta.after.precision == pytest.approx(1.0)
+    assert delta.false_positive_reduction == 14
+
+
+def test_recorded_strategy_fails_closed_for_unrecorded_finding():
+    candidates = build_corpus_candidates()
+    strategy = RecordedVerifierStrategy({}, min_confidence=0.60)
+
+    result = strategy.verify(candidates[0])
+
+    assert result.verdict == "NEEDS_REVIEW"
+    assert result.confidence == 0.0
+
+
+def test_recorded_strategy_from_file_roundtrip(tmp_path):
+    paths = write_corpus(tmp_path)
+    strategy = RecordedVerifierStrategy.from_file(
+        paths["recorded"], min_confidence=0.60
+    )
+    corpus = build_evaluation_corpus()
+    candidates = build_corpus_candidates()
+
+    delta = run_corpus_delta(corpus, candidates, strategy)
+
+    assert delta.after.precision == pytest.approx(1.0)
+
+
+def test_committed_corpus_artifacts_are_fresh(tmp_path):
+    """Regenerating from CORPUS_CASES must match the committed artifacts."""
+    regenerated = write_corpus(tmp_path)
+    committed_dir = default_harness_dir()
+
+    for name, regen_path in regenerated.items():
+        committed = (committed_dir / regen_path.name).read_text(encoding="utf-8")
+        assert committed == regen_path.read_text(encoding="utf-8"), (
+            f"{regen_path.name} is stale; run "
+            "`python -m security_scanner.core.evaluation.verifier_corpus`"
+        )
+
+
+def test_corpus_and_report_are_public_safe():
+    """I3/I6: no raw secret leaks into corpus artifacts or the rendered report."""
+    candidates = build_corpus_candidates()
+    delta = run_corpus_delta(
+        build_evaluation_corpus(), candidates, HeuristicVerifierStrategy()
+    )
+    report = render_verifier_delta_report(delta)
+
+    committed_dir = default_harness_dir()
+    blobs = [report]
+    for name in ("candidates.jsonl", "expected.json", "recorded-ideal.json"):
+        blobs.append((committed_dir / name).read_text(encoding="utf-8"))
+
+    for case in CORPUS_CASES:
+        for blob in blobs:
+            assert case.raw_secret not in blob
+    # Only salted hashes ever represent secrets in the candidate store.
+    candidates_text = (committed_dir / "candidates.jsonl").read_text(encoding="utf-8")
+    assert "SCANNER_FAKE_SECRET_TOKEN_" not in candidates_text
+    for line in candidates_text.splitlines():
+        assert json.loads(line)["evidence"]["secretHash"].startswith("salted-sha256:")

From e2f2c110f349de9ee587dbeaba2ad3f614d9f1c7 Mon Sep 17 00:00:00 2001
From: pureliture <tkdgur1756@naver.com>
Date: Sat, 20 Jun 2026 08:34:26 +0900
Subject: [PATCH 2/2] refactor(verifier): address gemini-code-assist review
 comments (#45)

- RecordedVerifierStrategy.from_file: fail-closed ValueError on non-mapping JSON
  (avoids later AttributeError) + parametrized type-validation test
- VerifierStrategy Protocol typing for verify_candidates/run_corpus_delta
- build_ideal_responses: single build_corpus_candidates() call (dedup)

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
---
 .../core/evaluation/verifier_corpus.py        | 11 ++++-------
 .../core/evaluation/verifier_harness.py       | 19 ++++++++++++++++---
 tests/test_verifier_harness.py                | 12 ++++++++++++
 3 files changed, 32 insertions(+), 10 deletions(-)

diff --git a/src/security_scanner/core/evaluation/verifier_corpus.py b/src/security_scanner/core/evaluation/verifier_corpus.py
index e58f083..e02330a 100644
--- a/src/security_scanner/core/evaluation/verifier_corpus.py
+++ b/src/security_scanner/core/evaluation/verifier_corpus.py
@@ -285,16 +285,13 @@ def build_ideal_responses(candidates: list[Finding] | None = None) -> dict:
     TP cases -> true_positive; FP cases -> false_positive. Confidence 0.95 so it
     clears any reasonable min_confidence threshold.
     """
+    # CORPUS_CASES and build_corpus_candidates() share order, so a single build
+    # aligns each case to its finding (finding ids are deterministic).
     candidates = candidates or build_corpus_candidates()
-    by_id = {finding.finding_id: finding for finding in candidates}
     responses: dict[str, dict] = {}
-    for case, finding in zip(CORPUS_CASES, build_corpus_candidates()):
-        # finding ids are deterministic; align case label to its finding.
-        fid = finding.finding_id
-        if fid not in by_id:
-            continue
+    for case, finding in zip(CORPUS_CASES, candidates):
         label = "true_positive" if case.label == TP else "false_positive"
-        responses[fid] = {
+        responses[finding.finding_id] = {
             "label": label,
             "confidence": 0.95,
             "reason": case.reason or f"Synthetic {case.label} case.",
diff --git a/src/security_scanner/core/evaluation/verifier_harness.py b/src/security_scanner/core/evaluation/verifier_harness.py
index d3d50c5..29edad4 100644
--- a/src/security_scanner/core/evaluation/verifier_harness.py
+++ b/src/security_scanner/core/evaluation/verifier_harness.py
@@ -22,6 +22,7 @@
 from collections.abc import Mapping
 from dataclasses import dataclass
 from pathlib import Path
+from typing import Protocol
 
 from security_scanner.core.evaluation.metrics import (
     EvaluationCorpus,
@@ -38,6 +39,12 @@
 )
 
 
+class VerifierStrategy(Protocol):
+    """Structural type for a candidate-to-result verifier strategy."""
+
+    def verify(self, finding: Finding) -> VerifierResult: ...
+
+
 @dataclass(frozen=True)
 class HeuristicVerifierStrategy:
     """Deterministic path-role anchor strategy (no model, no network)."""
@@ -78,11 +85,17 @@ def from_file(
         cls, path: str | Path, *, min_confidence: float = 0.60
     ) -> "RecordedVerifierStrategy":
         data = json.loads(Path(path).read_text(encoding="utf-8"))
-        responses = data.get("responses", data) if isinstance(data, Mapping) else data
+        if not isinstance(data, Mapping):
+            raise ValueError("Recorded responses file must contain a JSON object.")
+        responses = data.get("responses", data)
+        if not isinstance(responses, Mapping):
+            raise ValueError("Recorded responses must be a mapping.")
         return cls(responses=responses, min_confidence=min_confidence)
 
 
-def verify_candidates(candidates: list[Finding], strategy) -> list[Finding]:
+def verify_candidates(
+    candidates: list[Finding], strategy: VerifierStrategy
+) -> list[Finding]:
     """Apply *strategy* to each candidate, returning verified (triaged) findings."""
     return [
         apply_verifier_result(finding, strategy.verify(finding))
@@ -93,7 +106,7 @@ def verify_candidates(candidates: list[Finding], strategy) -> list[Finding]:
 def run_corpus_delta(
     corpus: EvaluationCorpus,
     candidates: list[Finding],
-    strategy,
+    strategy: VerifierStrategy,
     thresholds: EvaluationThresholds | None = None,
 ) -> VerifierDeltaResult:
     """Measure before/after verifier accuracy on a corpus (reuses metrics.py)."""
diff --git a/tests/test_verifier_harness.py b/tests/test_verifier_harness.py
index 0a89932..3c5ac39 100644
--- a/tests/test_verifier_harness.py
+++ b/tests/test_verifier_harness.py
@@ -143,3 +143,15 @@ def test_corpus_and_report_are_public_safe():
     assert "SCANNER_FAKE_SECRET_TOKEN_" not in candidates_text
     for line in candidates_text.splitlines():
         assert json.loads(line)["evidence"]["secretHash"].startswith("salted-sha256:")
+
+
+@pytest.mark.parametrize(
+    "payload",
+    ["[]", "42", "1.5", "true", "null", '"a string"', '{"responses": [1, 2]}'],
+)
+def test_recorded_from_file_rejects_non_mapping(tmp_path, payload):
+    """from_file fails closed on any non-object JSON instead of AttributeError later."""
+    path = tmp_path / "bad.json"
+    path.write_text(payload, encoding="utf-8")
+    with pytest.raises(ValueError):
+        RecordedVerifierStrategy.from_file(path)