Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 20 additions & 3 deletions src/security_scanner/runtime/incremental_discovery.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,10 @@
import datetime as dt
import fnmatch
import subprocess
from collections.abc import Callable, Sequence
from dataclasses import dataclass, field
from pathlib import Path
from typing import Callable, Protocol, Sequence
from typing import Protocol

from security_scanner.catalog.scan_target import ScanTarget
from security_scanner.runtime.poll_fetch import (
Expand All @@ -33,7 +34,6 @@
ScanLedgerKey,
)


DISCOVERY_MODE_INITIALIZE = "initialize"
DISCOVERY_MODE_ENQUEUE = "enqueue"
DEFAULT_REF_PATTERNS = ("refs/remotes/origin/*",)
Expand Down Expand Up @@ -115,7 +115,15 @@ def list_remote_refs(
return refs

def is_ancestor(self, repo_path: Path, old_sha: str, new_sha: str) -> bool:
cmd = ["git", "-C", str(repo_path), "merge-base", "--is-ancestor", old_sha, new_sha]
cmd = [
"git",
"-C",
str(repo_path),
"merge-base",
"--is-ancestor",
old_sha,
new_sha,
]
try:
subprocess.run(cmd, check=True, capture_output=True, text=True)
return True
Expand Down Expand Up @@ -489,6 +497,15 @@ def run_incremental_discovery(

fetch_ok += 1
refs_observed += len(refs)
if not refs:
advance = getattr(request.store, "advance_repo_health", None)
if advance is not None:
advance(
repo_id,
job_type=JOB_TYPE_INCREMENTAL,
completed_at=_now(request),
)
continue
Comment thread
pureliture marked this conversation as resolved.

for git_ref in refs:
current_state = request.store.get_ref_state(repo_id, git_ref.ref_name)
Expand Down
32 changes: 30 additions & 2 deletions tests/test_incremental_discovery.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@
from security_scanner.storage.adapters.nosql_db.items import repo_id_for_scan_target_url
from security_scanner.storage.base import RefState, ScanJob, ScanLedgerKey


NOW = dt.datetime(2026, 6, 12, 10, 0, tzinfo=dt.UTC)
TARGET = ScanTarget(
url="https://github.com/example-org/example-repo",
Expand All @@ -36,6 +35,7 @@ def __init__(self, targets: list[ScanTarget]) -> None:
self.ref_states: dict[tuple[str, str], RefState] = {}
self.jobs: dict[str, ScanJob] = {}
self.ledger: set[ScanLedgerKey] = set()
self.health_advances: list[tuple[str, str, dt.datetime]] = []

def list_scan_targets(self) -> list[ScanTarget]:
return list(self.targets)
Expand All @@ -62,6 +62,9 @@ def enqueue_commit_scan_job(self, job: ScanJob) -> bool:
self.jobs[job.job_id] = job
return True

def advance_repo_health(self, repo_id: str, *, job_type: str, completed_at) -> None:
self.health_advances.append((repo_id, job_type, completed_at))


class FakeGitDiscovery:
def __init__(self) -> None:
Expand Down Expand Up @@ -182,7 +185,9 @@ def test_enqueue_creates_one_job_per_new_unscanned_commit_and_advances_ref_state

assert summary.jobs_enqueued == 2
assert {job.commit_sha for job in store.jobs.values()} == {MID_SHA, NEW_SHA}
assert {job.commit_range for job in store.jobs.values()} == {f"{OLD_SHA}..{NEW_SHA}"}
assert {job.commit_range for job in store.jobs.values()} == {
f"{OLD_SHA}..{NEW_SHA}"
}
assert store.ref_states[(repo_id, REF_MAIN)].last_seen_sha == NEW_SHA


Expand All @@ -207,6 +212,29 @@ def test_enqueue_missing_ref_state_observes_without_backfill():
assert store.ref_states[(repo_id, REF_MAIN)].last_seen_sha == NEW_SHA


def test_enqueue_no_refs_marks_repo_incrementally_fresh_without_job():
repo_path = Path("/synthetic-cache/example-repo")
repo_id = repo_id_for_scan_target_url(TARGET.url)
store = FakeIncrementalStore([TARGET])
git = FakeGitDiscovery()
git.refs_by_path[repo_path] = []

summary = run_incremental_discovery(
_request(
mode=DISCOVERY_MODE_ENQUEUE,
store=store,
git=git,
fetch_repo=lambda url: repo_path,
)
)

assert summary.fetch_ok == 1
assert summary.refs_observed == 0
assert summary.jobs_enqueued == 0
assert store.jobs == {}
assert store.health_advances == [(repo_id, "incremental", NOW)]


def test_enqueue_skips_commits_present_in_ledger():
repo_path = Path("/synthetic-cache/example-repo")
repo_id = repo_id_for_scan_target_url(TARGET.url)
Expand Down
Loading