diff --git a/src/security_scanner/runtime/scan_worker.py b/src/security_scanner/runtime/scan_worker.py index 979bcd4..527b94b 100644 --- a/src/security_scanner/runtime/scan_worker.py +++ b/src/security_scanner/runtime/scan_worker.py @@ -19,6 +19,7 @@ from security_scanner.scanners.gitleaks.scanner import GitleaksScanner from security_scanner.storage.base import ( IncrementalScanStore, + JOB_TYPE_BASELINE, ScanJob, ScanLedgerEntry, ) @@ -121,16 +122,13 @@ def run_scan_worker_once(request: ScanWorkerRequest) -> ScanWorkerSummary: findings = request.scanner.scan( repo_full_name=job.repo_id, root=repo_path, - scan_options=ScanOptions( - include_history=True, - git_log_opts=f"{job.commit_sha}^!", - ), + scan_options=_scan_options_for_job(job), scan_run_id=scan_run_id, rule_pack_version=job.rule_pack_version, ) - branch = branch_from_ref(job.ref_name) + commit, branch = _finding_context_for_job(job) findings = [ - finding_with_context(finding, commit=job.commit_sha, branch=branch) + finding_with_context(finding, commit=commit, branch=branch) for finding in findings ] scanned_at = _now(request) @@ -234,6 +232,23 @@ def make_default_scanner() -> GitleaksScanner: return GitleaksScanner() +def _scan_options_for_job(job: ScanJob) -> ScanOptions: + """Return gitleaks scan options for one queued job.""" + if job.job_type == JOB_TYPE_BASELINE: + return ScanOptions(include_history=True) + return ScanOptions( + include_history=True, + git_log_opts=f"{job.commit_sha}^!", + ) + + +def _finding_context_for_job(job: ScanJob) -> tuple[str | None, str | None]: + """Return occurrence context to stamp onto scanner findings.""" + if job.job_type == JOB_TYPE_BASELINE: + return None, None + return job.commit_sha, branch_from_ref(job.ref_name) + + def _advance_repo_health( request: ScanWorkerRequest, job: ScanJob, *, completed_at: dt.datetime ) -> None: diff --git a/tests/test_scan_worker.py b/tests/test_scan_worker.py index 0677c84..6ed66e1 100644 --- a/tests/test_scan_worker.py +++ b/tests/test_scan_worker.py @@ -12,7 +12,12 @@ run_scan_worker, run_scan_worker_once, ) -from security_scanner.storage.base import ScanJob, ScanLedgerEntry, ScanLedgerKey +from security_scanner.storage.base import ( + JOB_TYPE_BASELINE, + ScanJob, + ScanLedgerEntry, + ScanLedgerKey, +) NOW = dt.datetime(2026, 6, 12, 12, 0, tzinfo=dt.UTC) REPO_ID = "repo_synthetic000000000001" @@ -472,7 +477,16 @@ def test_worker_threads_leased_job_fence_into_retryable_failure(): def _baseline_job() -> ScanJob: job = _job() - return ScanJob(**{**job.__dict__, "job_type": "baseline"}) + return ScanJob( + **{ + **job.__dict__, + "job_type": JOB_TYPE_BASELINE, + "commit_sha": "baseline", + "new_sha": "baseline", + "commit_range": None, + "ref_name": "refs/remotes/origin/HEAD", + } + ) def test_completed_incremental_job_advances_incremental_repo_health(): @@ -492,7 +506,21 @@ def test_completed_baseline_job_advances_baseline_repo_health(): run_scan_worker_once(_request(store, scanner)) - assert store.health_advances == [(REPO_ID, "baseline", NOW)] + assert store.health_advances == [(REPO_ID, JOB_TYPE_BASELINE, NOW)] + + +def test_baseline_job_runs_full_history_without_sentinel_context(): + store = FakeWorkerStore([_baseline_job()]) + scanner = FakeScanner(findings=[_finding(commit=None)]) + + run_scan_worker_once(_request(store, scanner)) + + call = scanner.calls[0] + assert call["scan_options"] == ScanOptions(include_history=True, git_log_opts=None) + _, findings, ledger = store.completed[0] + assert findings[0].repo.commit is None + assert findings[0].repo.branch is None + assert ledger.commit_sha == "baseline" def test_ledger_already_present_completion_still_advances_repo_health():