diff --git a/src/security_scanner/runtime/incremental_discovery.py b/src/security_scanner/runtime/incremental_discovery.py index dcc8ed2..340634b 100644 --- a/src/security_scanner/runtime/incremental_discovery.py +++ b/src/security_scanner/runtime/incremental_discovery.py @@ -5,9 +5,10 @@ import datetime as dt import fnmatch import subprocess +from collections.abc import Callable, Sequence from dataclasses import dataclass, field from pathlib import Path -from typing import Callable, Protocol, Sequence +from typing import Protocol from security_scanner.catalog.scan_target import ScanTarget from security_scanner.runtime.poll_fetch import ( @@ -33,7 +34,6 @@ ScanLedgerKey, ) - DISCOVERY_MODE_INITIALIZE = "initialize" DISCOVERY_MODE_ENQUEUE = "enqueue" DEFAULT_REF_PATTERNS = ("refs/remotes/origin/*",) @@ -115,7 +115,15 @@ def list_remote_refs( return refs def is_ancestor(self, repo_path: Path, old_sha: str, new_sha: str) -> bool: - cmd = ["git", "-C", str(repo_path), "merge-base", "--is-ancestor", old_sha, new_sha] + cmd = [ + "git", + "-C", + str(repo_path), + "merge-base", + "--is-ancestor", + old_sha, + new_sha, + ] try: subprocess.run(cmd, check=True, capture_output=True, text=True) return True @@ -489,6 +497,15 @@ def run_incremental_discovery( fetch_ok += 1 refs_observed += len(refs) + if not refs: + advance = getattr(request.store, "advance_repo_health", None) + if advance is not None: + advance( + repo_id, + job_type=JOB_TYPE_INCREMENTAL, + completed_at=_now(request), + ) + continue for git_ref in refs: current_state = request.store.get_ref_state(repo_id, git_ref.ref_name) diff --git a/tests/test_incremental_discovery.py b/tests/test_incremental_discovery.py index 3924bad..80a0119 100644 --- a/tests/test_incremental_discovery.py +++ b/tests/test_incremental_discovery.py @@ -17,7 +17,6 @@ from security_scanner.storage.adapters.nosql_db.items import repo_id_for_scan_target_url from security_scanner.storage.base import RefState, ScanJob, ScanLedgerKey - NOW = dt.datetime(2026, 6, 12, 10, 0, tzinfo=dt.UTC) TARGET = ScanTarget( url="https://github.com/example-org/example-repo", @@ -36,6 +35,7 @@ def __init__(self, targets: list[ScanTarget]) -> None: self.ref_states: dict[tuple[str, str], RefState] = {} self.jobs: dict[str, ScanJob] = {} self.ledger: set[ScanLedgerKey] = set() + self.health_advances: list[tuple[str, str, dt.datetime]] = [] def list_scan_targets(self) -> list[ScanTarget]: return list(self.targets) @@ -62,6 +62,9 @@ def enqueue_commit_scan_job(self, job: ScanJob) -> bool: self.jobs[job.job_id] = job return True + def advance_repo_health(self, repo_id: str, *, job_type: str, completed_at) -> None: + self.health_advances.append((repo_id, job_type, completed_at)) + class FakeGitDiscovery: def __init__(self) -> None: @@ -182,7 +185,9 @@ def test_enqueue_creates_one_job_per_new_unscanned_commit_and_advances_ref_state assert summary.jobs_enqueued == 2 assert {job.commit_sha for job in store.jobs.values()} == {MID_SHA, NEW_SHA} - assert {job.commit_range for job in store.jobs.values()} == {f"{OLD_SHA}..{NEW_SHA}"} + assert {job.commit_range for job in store.jobs.values()} == { + f"{OLD_SHA}..{NEW_SHA}" + } assert store.ref_states[(repo_id, REF_MAIN)].last_seen_sha == NEW_SHA @@ -207,6 +212,29 @@ def test_enqueue_missing_ref_state_observes_without_backfill(): assert store.ref_states[(repo_id, REF_MAIN)].last_seen_sha == NEW_SHA +def test_enqueue_no_refs_marks_repo_incrementally_fresh_without_job(): + repo_path = Path("/synthetic-cache/example-repo") + repo_id = repo_id_for_scan_target_url(TARGET.url) + store = FakeIncrementalStore([TARGET]) + git = FakeGitDiscovery() + git.refs_by_path[repo_path] = [] + + summary = run_incremental_discovery( + _request( + mode=DISCOVERY_MODE_ENQUEUE, + store=store, + git=git, + fetch_repo=lambda url: repo_path, + ) + ) + + assert summary.fetch_ok == 1 + assert summary.refs_observed == 0 + assert summary.jobs_enqueued == 0 + assert store.jobs == {} + assert store.health_advances == [(repo_id, "incremental", NOW)] + + def test_enqueue_skips_commits_present_in_ledger(): repo_path = Path("/synthetic-cache/example-repo") repo_id = repo_id_for_scan_target_url(TARGET.url)