From 15e49d7c397d0d7ccb4845b9984d8f8b79ed98a2 Mon Sep 17 00:00:00 2001
From: Dominique Broeglin <dominique.broeglin@microsoft.com>
Date: Sat, 27 Jun 2026 12:02:27 +0200
Subject: [PATCH] Fail fast on unavailable Pier backend

Add a Pier backend preflight so Docker setup failures stop before empty jobs are recorded. Improve analyze diagnostics for Pier trials that fail before Copilot logs are captured.

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
---
 README.md                               |  4 +-
 docs/authoring-experiments.md           |  4 ++
 docs/results-format.md                  |  3 +-
 src/copilot_experiments/cli.py          | 37 +++++++++++
 src/copilot_experiments/pier_backend.py | 62 ++++++++++++++++++
 src/copilot_experiments/pier_results.py | 33 ++++++++++
 tests/test_pier_backend.py              | 83 +++++++++++++++++++++++++
 tests/test_pier_results.py              | 62 ++++++++++++++++++
 8 files changed, 286 insertions(+), 2 deletions(-)
diff --git a/README.md b/README.md
index ff7a27e..1e440d9 100644
--- a/README.md
+++ b/README.md
@@ -70,7 +70,9 @@ current local checkout into uv's cache. If you are iterating on the tool and nee
 rebuild from the working tree, add `--no-cache` before `--from`.
 
 Real runs require Copilot auth (`COPILOT_GITHUB_TOKEN`, `GH_TOKEN`, `GITHUB_TOKEN`, or `gh auth
-login`) and a Pier-supported execution backend such as Docker.
+login`) and a Pier-supported execution backend such as Docker. `run` preflights the selected
+backend before creating a job; for Docker this checks the CLI, Compose plugin, and daemon
+connection so missing WSL integration fails before an empty Pier job is recorded.
 
 Each `run` is a new measurement. If the configured Pier `job_name` already exists under `jobs/`,
 `copilot-experiments` writes the rerun to a timestamped job name instead of silently reusing the
diff --git a/docs/authoring-experiments.md b/docs/authoring-experiments.md
index 8d12433..24b6cb8 100644
--- a/docs/authoring-experiments.md
+++ b/docs/authoring-experiments.md
@@ -165,6 +165,10 @@ rebuild from the working tree, add `--no-cache` before `--from`.
 Python experiment path still has an ephemeral mock dry-run, but Pier is the primary authoring
 model.
 
+`run` performs a lightweight backend preflight before Pier creates a job. For the default Docker
+backend it verifies that `docker`, `docker compose`, and the Docker daemon are reachable; this catches
+common WSL/Docker Desktop integration issues before a trial can fail without Copilot logs.
+
 Pier itself resumes existing matching job directories and skips trials that already have
 `result.json`. `copilot-experiments run` treats a plain rerun as a fresh measurement instead: when
 `jobs/<job_name>/` already exists, it appends a timestamp to the Pier job name for the new run. Pass
diff --git a/docs/results-format.md b/docs/results-format.md
index 4a3eced..e81e98a 100644
--- a/docs/results-format.md
+++ b/docs/results-format.md
@@ -89,4 +89,5 @@ uv run copilot-experiments analyze --file jobs/<job>/<trial>/agent/copilot-sessi
 
 If the selected Pier trial has no native Copilot `events.jsonl`, `analyze` falls back to
 `agent/trajectory.json` when present; otherwise it reports that no Copilot session log or
-trajectory is available.
+trajectory is available. When Pier recorded a trial exception before the agent ran, `analyze`
+includes that harness error and points at the trial `result.json`.
diff --git a/src/copilot_experiments/cli.py b/src/copilot_experiments/cli.py
index 21d9ed0..fe858b5 100644
--- a/src/copilot_experiments/cli.py
+++ b/src/copilot_experiments/cli.py
@@ -19,12 +19,16 @@
 from .index import reindex as index_reindex
 from .models import DryRunReport, Experiment, ExperimentRun
 from .pier_backend import (
+    PierBackendPreflightError,
     discover_pier_job_configs,
     inject_copilot_token,
+    preflight_pier_backend,
     prepare_pier_job_for_run,
     run_pier_job,
 )
 from .pier_results import (
+    describe_missing_pier_analysis_source,
+    iter_pier_trial_summaries,
     resolve_pier_trial_analysis_source,
     write_pier_summary,
 )
@@ -294,6 +298,13 @@ def run(
             console.print("[green]Pier config validation OK[/green] [dim]— no job was run[/dim]")
             raise typer.Exit(0)
 
+        try:
+            for spec in pier_specs:
+                preflight_pier_backend(spec.config)
+        except PierBackendPreflightError as exc:
+            err.print(f"[red]Pier backend preflight failed:[/red] {exc}")
+            raise typer.Exit(1) from exc
+
         try:
             auth = preflight_github_token()
         except AuthError as exc:
@@ -322,6 +333,7 @@ def run(
                 continue
             summary = write_pier_summary(run_result.job_dir)
             _print_run_summary(summary)
+            _warn_failed_pier_trials(run_result.job_dir)
             if summary.get("status") != "completed":
                 any_failures = True
             console.print(f"[dim]results:[/dim] {run_result.job_dir}\n")
@@ -633,6 +645,9 @@ def analyze(
             )
             if source_path is None:
                 err.print(f"[red]No Copilot session log or trajectory found in[/red] {pier_job}")
+                diagnostic = describe_missing_pier_analysis_source(pier_job, trial)
+                if diagnostic:
+                    err.print(f"[yellow]{diagnostic}[/yellow]")
                 raise typer.Exit(1)
             selected_otel = otel_file or discovered_otel
             analysis = (
@@ -821,6 +836,28 @@ def _warn_failed_trials(layout: Layout, experiment: Experiment, run: ExperimentR
         err.print(f"[yellow]{line}[/yellow]")
 
 
+def _warn_failed_pier_trials(job_dir: Path) -> None:
+    """Point Pier harness failures at the trial result artifact."""
+
+    problems: list[str] = []
+    for trial in iter_pier_trial_summaries(job_dir):
+        if trial.get("status") == "ok":
+            continue
+        trial_name = str(trial.get("trial_name") or trial.get("trial_no") or "-")
+        problems.append(
+            f"  {trial_name}: harness failure — {trial.get('error') or trial.get('status')}\n"
+            f"      -> {job_dir / trial_name / 'result.json'}"
+        )
+    if not problems:
+        return
+    err.print(
+        f"[yellow]Warning:[/yellow] Pier job [bold]{job_dir.name}[/bold] had "
+        f"{len(problems)} harness failure(s). Inspect the captured trial result:"
+    )
+    for line in problems:
+        err.print(f"[yellow]{line}[/yellow]")
+
+
 def _inspect_pier_job(job_dir: Path) -> None:
     summary = write_pier_summary(job_dir)
     console.print(f"[bold]Pier job[/bold]: {job_dir.name}")
diff --git a/src/copilot_experiments/pier_backend.py b/src/copilot_experiments/pier_backend.py
index b264bcd..3d6afa2 100644
--- a/src/copilot_experiments/pier_backend.py
+++ b/src/copilot_experiments/pier_backend.py
@@ -4,6 +4,8 @@
 
 import asyncio
 import json
+import shutil
+import subprocess
 from dataclasses import dataclass
 from datetime import datetime
 from pathlib import Path
@@ -49,6 +51,10 @@ def renamed(self) -> bool:
         return self.requested_name != self.run_name
 
 
+class PierBackendPreflightError(RuntimeError):
+    """A Pier execution backend is not available before a job starts."""
+
+
 def discover_pier_job_configs(root: Path, name: str | None = None) -> list[PierJobSpec]:
     """Load Pier JobConfig files from ``experiments/*.yaml|*.yml|*.json``."""
 
@@ -120,6 +126,14 @@ def normalize_pier_job_config(config: Any, *, root: Path, base_dir: Path) -> Any
     return config
 
 
+def preflight_pier_backend(config: Any) -> None:
+    """Fail fast when the configured Pier backend is not usable locally."""
+
+    backend = _environment_type(config)
+    if backend == "docker":
+        _preflight_docker_backend()
+
+
 def inject_copilot_token(config: Any, token: str) -> None:
     """Inject a GitHub token into local Copilot agents without persisting it to config."""
 
@@ -189,3 +203,51 @@ def _resolve_path(path: Path, base: Path) -> Path:
 
 def _job_dir(config: Any) -> Path:
     return Path(config.jobs_dir) / str(config.job_name)
+
+
+def _environment_type(config: Any) -> str:
+    environment = getattr(config, "environment", None)
+    value = getattr(environment, "type", None)
+    return str(getattr(value, "value", value) or "").lower()
+
+
+def _preflight_docker_backend() -> None:
+    if shutil.which("docker") is None:
+        raise PierBackendPreflightError(
+            "Pier is configured to use the Docker backend, but the 'docker' CLI was not found. "
+            "Install Docker or enable Docker Desktop WSL integration, then retry."
+        )
+
+    _run_backend_probe(
+        ["docker", "compose", "version"],
+        "Docker Compose is required by Pier's Docker backend but is not available.",
+    )
+    _run_backend_probe(
+        ["docker", "info"],
+        "Docker is installed, but the daemon is not reachable.",
+    )
+
+
+def _run_backend_probe(command: list[str], failure_message: str) -> None:
+    try:
+        proc = subprocess.run(
+            command,
+            capture_output=True,
+            text=True,
+            timeout=20,
+        )
+    except OSError as exc:
+        raise PierBackendPreflightError(f"{failure_message} ({exc})") from exc
+    except subprocess.TimeoutExpired as exc:
+        raise PierBackendPreflightError(
+            f"{failure_message} Probe timed out after {exc.timeout} seconds: {' '.join(command)}"
+        ) from exc
+
+    if proc.returncode == 0:
+        return
+
+    detail = "\n".join(part for part in (proc.stderr.strip(), proc.stdout.strip()) if part)
+    suffix = f"\n{detail}" if detail else ""
+    raise PierBackendPreflightError(
+        f"{failure_message} Command failed: {' '.join(command)}{suffix}"
+    )
diff --git a/src/copilot_experiments/pier_results.py b/src/copilot_experiments/pier_results.py
index 6bc4c58..6ddca31 100644
--- a/src/copilot_experiments/pier_results.py
+++ b/src/copilot_experiments/pier_results.py
@@ -139,6 +139,39 @@ def resolve_pier_trial_analysis_source(
     return None, label, None, None
 
 
+def describe_missing_pier_analysis_source(
+    job_dir: Path, trial: int | str | None = None
+) -> str | None:
+    """Explain why a Pier trial has no Copilot-native analysis source."""
+
+    trial_dir = _resolve_trial_dir(job_dir, trial)
+    if trial_dir is None:
+        return "No Pier trial directories with result.json were found."
+
+    result_path = trial_dir / "result.json"
+    if not result_path.exists():
+        return f"Selected Pier trial has no result.json: {result_path}"
+
+    trial_result = read_json(result_path)
+    exception = trial_result.get("exception_info") or {}
+    if exception:
+        message = (
+            exception.get("exception_message")
+            or exception.get("message")
+            or exception.get("exception_type")
+            or str(exception)
+        )
+        return (
+            "Selected Pier trial failed before a Copilot session was captured: "
+            f"{message}\nInspect: {result_path}"
+        )
+
+    return (
+        "Selected Pier trial completed without a native Copilot session log or ATIF trajectory.\n"
+        f"Inspect: {trial_dir / 'agent'}"
+    )
+
+
 def _resolve_trial_dir(job_dir: Path, trial: int | str | None = None) -> Path | None:
     trial_dirs = iter_trial_dirs(job_dir)
     if not trial_dirs:
diff --git a/tests/test_pier_backend.py b/tests/test_pier_backend.py
index 95542ba..23ad902 100644
--- a/tests/test_pier_backend.py
+++ b/tests/test_pier_backend.py
@@ -4,14 +4,19 @@
 
 from datetime import datetime
 from pathlib import Path
+from subprocess import CompletedProcess
 
 import pytest
+from typer.testing import CliRunner
 
+from copilot_experiments.cli import app
 from copilot_experiments.pier_backend import (
     COPILOT_CLI_AGENT_IMPORT_PATH,
+    PierBackendPreflightError,
     discover_pier_job_configs,
     inject_copilot_token,
     load_pier_job_config,
+    preflight_pier_backend,
     prepare_pier_job_for_run,
 )
 
@@ -119,6 +124,84 @@ def test_prepare_pier_job_for_run_resume_keeps_existing_name(tmp_path: Path):
     assert not prepared.renamed
 
 
+def test_preflight_pier_backend_reports_missing_docker(
+    tmp_path: Path, monkeypatch: pytest.MonkeyPatch
+):
+    config_path = tmp_path / "job.yaml"
+    config_path.write_text("job_name: smoke\n", encoding="utf-8")
+    config = load_pier_job_config(config_path, root=tmp_path)
+    monkeypatch.setattr("copilot_experiments.pier_backend.shutil.which", lambda _name: None)
+
+    with pytest.raises(PierBackendPreflightError, match="docker.*not found"):
+        preflight_pier_backend(config)
+
+
+def test_preflight_pier_backend_checks_docker_compose_and_daemon(
+    tmp_path: Path, monkeypatch: pytest.MonkeyPatch
+):
+    config_path = tmp_path / "job.yaml"
+    config_path.write_text("job_name: smoke\n", encoding="utf-8")
+    config = load_pier_job_config(config_path, root=tmp_path)
+    calls: list[list[str]] = []
+
+    def fake_run(command: list[str], **_kwargs):
+        calls.append(command)
+        return CompletedProcess(command, 0, stdout="ok", stderr="")
+
+    monkeypatch.setattr("copilot_experiments.pier_backend.shutil.which", lambda _name: "docker")
+    monkeypatch.setattr("copilot_experiments.pier_backend.subprocess.run", fake_run)
+
+    preflight_pier_backend(config)
+
+    assert calls == [["docker", "compose", "version"], ["docker", "info"]]
+
+
+def test_preflight_pier_backend_reports_unreachable_docker_daemon(
+    tmp_path: Path, monkeypatch: pytest.MonkeyPatch
+):
+    config_path = tmp_path / "job.yaml"
+    config_path.write_text("job_name: smoke\n", encoding="utf-8")
+    config = load_pier_job_config(config_path, root=tmp_path)
+
+    def fake_run(command: list[str], **_kwargs):
+        if command == ["docker", "info"]:
+            return CompletedProcess(command, 1, stdout="", stderr="Cannot connect to Docker daemon")
+        return CompletedProcess(command, 0, stdout="ok", stderr="")
+
+    monkeypatch.setattr("copilot_experiments.pier_backend.shutil.which", lambda _name: "docker")
+    monkeypatch.setattr("copilot_experiments.pier_backend.subprocess.run", fake_run)
+
+    with pytest.raises(PierBackendPreflightError, match="Cannot connect to Docker daemon"):
+        preflight_pier_backend(config)
+
+
+def test_cli_run_fails_before_auth_when_pier_backend_preflight_fails(
+    tmp_path: Path, monkeypatch: pytest.MonkeyPatch
+):
+    experiments = tmp_path / "experiments"
+    experiments.mkdir()
+    (experiments / "job.yaml").write_text("job_name: smoke\n", encoding="utf-8")
+
+    def fail_preflight(_config):
+        raise PierBackendPreflightError("Docker is unavailable")
+
+    def fail_if_called():
+        pytest.fail("auth should not run after backend preflight failure")
+
+    monkeypatch.setattr("copilot_experiments.cli.preflight_pier_backend", fail_preflight)
+    monkeypatch.setattr("copilot_experiments.cli.preflight_github_token", fail_if_called)
+    monkeypatch.setattr(
+        "copilot_experiments.cli.run_pier_job",
+        lambda _config: pytest.fail("Pier job should not start after backend preflight failure"),
+    )
+
+    result = CliRunner().invoke(app, ["run", "--root", str(tmp_path)])
+
+    assert result.exit_code == 1
+    assert "Pier backend preflight failed" in result.output
+    assert "Docker is unavailable" in result.output
+
+
 @pytest.mark.parametrize(
     "example_root",
     [
diff --git a/tests/test_pier_results.py b/tests/test_pier_results.py
index 0510481..d08b46e 100644
--- a/tests/test_pier_results.py
+++ b/tests/test_pier_results.py
@@ -11,6 +11,7 @@
 from copilot_experiments.index import connect, index_pier_job_dir
 from copilot_experiments.pier_results import (
     build_pier_summary,
+    describe_missing_pier_analysis_source,
     resolve_pier_trial_events,
     write_pier_summary,
 )
@@ -165,6 +166,40 @@ def _make_pier_job_with_trajectory(job_dir: Path) -> Path:
     return job_dir
 
 
+def _make_pier_job_with_harness_error(job_dir: Path) -> Path:
+    _write_json(
+        job_dir / "config.json",
+        {"job_name": "demo-job"},
+    )
+    _write_json(
+        job_dir / "result.json",
+        {
+            "started_at": "2026-01-01T00:00:00Z",
+            "finished_at": "2026-01-01T00:00:05Z",
+            "stats": {"n_errored_trials": 1},
+        },
+    )
+    trial = job_dir / "copilot-cli__textstats__1"
+    _write_json(
+        trial / "result.json",
+        {
+            "trial_name": "copilot-cli__textstats__1",
+            "task_name": "textstats",
+            "started_at": "2026-01-01T00:00:00Z",
+            "finished_at": "2026-01-01T00:00:05Z",
+            "agent_info": {
+                "name": "copilot-cli",
+                "model_info": {"name": "gpt-5-mini"},
+            },
+            "exception_info": {
+                "exception_type": "RuntimeError",
+                "exception_message": "Docker daemon unavailable",
+            },
+        },
+    )
+    return job_dir
+
+
 def test_build_pier_summary_reads_native_copilot_events(tmp_path: Path):
     job_dir = _make_pier_job(tmp_path / "jobs" / "demo-job")
 
@@ -202,6 +237,17 @@ def test_build_pier_summary_reads_trajectory_when_native_events_are_absent(tmp_p
     assert variant["avg_output_tokens"] == 7.0
 
 
+def test_describe_missing_pier_analysis_source_explains_harness_error(tmp_path: Path):
+    job_dir = _make_pier_job_with_harness_error(tmp_path / "jobs" / "demo-job")
+
+    diagnostic = describe_missing_pier_analysis_source(job_dir)
+
+    assert diagnostic is not None
+    assert "failed before a Copilot session was captured" in diagnostic
+    assert "Docker daemon unavailable" in diagnostic
+    assert "result.json" in diagnostic
+
+
 def test_cli_analyze_reads_pier_job_events(tmp_path: Path):
     _make_pier_job(tmp_path / "jobs" / "demo-job")
     runner = CliRunner()
@@ -232,6 +278,22 @@ def test_cli_analyze_reads_pier_job_trajectory_when_events_are_absent(tmp_path:
     assert "view" in result.output
 
 
+def test_cli_analyze_reports_pier_harness_error_when_logs_are_absent(tmp_path: Path):
+    _make_pier_job_with_harness_error(tmp_path / "jobs" / "demo-job")
+    runner = CliRunner()
+
+    result = runner.invoke(
+        app,
+        ["analyze", "demo-job", "--root", str(tmp_path), "--trial", "1"],
+    )
+
+    assert result.exit_code == 1
+    assert "No Copilot session log or trajectory found" in result.output
+    assert "failed before a Copilot session was captured" in result.output
+    assert "Docker daemon" in result.output
+    assert "unavailable" in result.output
+
+
 def test_write_pier_summary_and_index(tmp_path: Path):
     job_dir = _make_pier_job(tmp_path / "jobs" / "demo-job")