From ad11443a8b55fe6816d125f798a8e5bc68d3fdc0 Mon Sep 17 00:00:00 2001 From: pureliture Date: Sun, 21 Jun 2026 09:10:57 +0900 Subject: [PATCH] =?UTF-8?q?fix(worker):=20baseline=20job=EC=9D=84=20full-h?= =?UTF-8?q?istory=20scan=EC=9C=BC=EB=A1=9C=20=EC=B2=98=EB=A6=AC?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit baseline enqueue의 sentinel commit을 단일 commit log opts로 넘기지 않도록 scan-worker 옵션을 분리한다. baseline job은 finding context에 sentinel commit/HEAD branch를 덮어쓰지 않고, scanner가 제공한 history context를 보존한다. 검증: uv run pytest tests/test_scan_worker.py -q && uv run pytest -q && uv run python -m governance.autopilot_gate --base origin/main Co-Authored-By: Codex GPT-5 --- src/security_scanner/runtime/scan_worker.py | 27 ++++++++++++---- tests/test_scan_worker.py | 34 +++++++++++++++++++-- 2 files changed, 52 insertions(+), 9 deletions(-) diff --git a/src/security_scanner/runtime/scan_worker.py b/src/security_scanner/runtime/scan_worker.py index 979bcd4..527b94b 100644 --- a/src/security_scanner/runtime/scan_worker.py +++ b/src/security_scanner/runtime/scan_worker.py @@ -19,6 +19,7 @@ from security_scanner.scanners.gitleaks.scanner import GitleaksScanner from security_scanner.storage.base import ( IncrementalScanStore, + JOB_TYPE_BASELINE, ScanJob, ScanLedgerEntry, ) @@ -121,16 +122,13 @@ def run_scan_worker_once(request: ScanWorkerRequest) -> ScanWorkerSummary: findings = request.scanner.scan( repo_full_name=job.repo_id, root=repo_path, - scan_options=ScanOptions( - include_history=True, - git_log_opts=f"{job.commit_sha}^!", - ), + scan_options=_scan_options_for_job(job), scan_run_id=scan_run_id, rule_pack_version=job.rule_pack_version, ) - branch = branch_from_ref(job.ref_name) + commit, branch = _finding_context_for_job(job) findings = [ - finding_with_context(finding, commit=job.commit_sha, branch=branch) + finding_with_context(finding, commit=commit, branch=branch) for finding in findings ] scanned_at = _now(request) @@ -234,6 +232,23 @@ def make_default_scanner() -> GitleaksScanner: return GitleaksScanner() +def _scan_options_for_job(job: ScanJob) -> ScanOptions: + """Return gitleaks scan options for one queued job.""" + if job.job_type == JOB_TYPE_BASELINE: + return ScanOptions(include_history=True) + return ScanOptions( + include_history=True, + git_log_opts=f"{job.commit_sha}^!", + ) + + +def _finding_context_for_job(job: ScanJob) -> tuple[str | None, str | None]: + """Return occurrence context to stamp onto scanner findings.""" + if job.job_type == JOB_TYPE_BASELINE: + return None, None + return job.commit_sha, branch_from_ref(job.ref_name) + + def _advance_repo_health( request: ScanWorkerRequest, job: ScanJob, *, completed_at: dt.datetime ) -> None: diff --git a/tests/test_scan_worker.py b/tests/test_scan_worker.py index 0677c84..6ed66e1 100644 --- a/tests/test_scan_worker.py +++ b/tests/test_scan_worker.py @@ -12,7 +12,12 @@ run_scan_worker, run_scan_worker_once, ) -from security_scanner.storage.base import ScanJob, ScanLedgerEntry, ScanLedgerKey +from security_scanner.storage.base import ( + JOB_TYPE_BASELINE, + ScanJob, + ScanLedgerEntry, + ScanLedgerKey, +) NOW = dt.datetime(2026, 6, 12, 12, 0, tzinfo=dt.UTC) REPO_ID = "repo_synthetic000000000001" @@ -472,7 +477,16 @@ def test_worker_threads_leased_job_fence_into_retryable_failure(): def _baseline_job() -> ScanJob: job = _job() - return ScanJob(**{**job.__dict__, "job_type": "baseline"}) + return ScanJob( + **{ + **job.__dict__, + "job_type": JOB_TYPE_BASELINE, + "commit_sha": "baseline", + "new_sha": "baseline", + "commit_range": None, + "ref_name": "refs/remotes/origin/HEAD", + } + ) def test_completed_incremental_job_advances_incremental_repo_health(): @@ -492,7 +506,21 @@ def test_completed_baseline_job_advances_baseline_repo_health(): run_scan_worker_once(_request(store, scanner)) - assert store.health_advances == [(REPO_ID, "baseline", NOW)] + assert store.health_advances == [(REPO_ID, JOB_TYPE_BASELINE, NOW)] + + +def test_baseline_job_runs_full_history_without_sentinel_context(): + store = FakeWorkerStore([_baseline_job()]) + scanner = FakeScanner(findings=[_finding(commit=None)]) + + run_scan_worker_once(_request(store, scanner)) + + call = scanner.calls[0] + assert call["scan_options"] == ScanOptions(include_history=True, git_log_opts=None) + _, findings, ledger = store.completed[0] + assert findings[0].repo.commit is None + assert findings[0].repo.branch is None + assert ledger.commit_sha == "baseline" def test_ledger_already_present_completion_still_advances_repo_health():