CopilotKit/showcase/scripts/cli/_common.sh at main · CodeXiaoHan/CopilotKit · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
#!/usr/bin/env bash
# Shared variables and helper functions for the showcase CLI.
# Sourced by bin/showcase — not meant to be executed directly.

# ── Paths ────────────────────────────────────────────────────────────────────

SHOWCASE_ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)"
COMPOSE_FILE="$SHOWCASE_ROOT/docker-compose.local.yml"
COMPOSE_CMD="docker compose -f $COMPOSE_FILE"
ENV_FILE="$SHOWCASE_ROOT/.env"
PORTS_FILE="$SHOWCASE_ROOT/shared/local-ports.json"
AIMOCK_COMPOSE="$SHOWCASE_ROOT/tests/docker-compose.integrations.yml"

# ── Output helpers ───────────────────────────────────────────────────────────

die() {
  printf '\033[1;31m✗ %s\033[0m\n' "$1" >&2
  exit 1
}

info() {
  printf '\033[0;36m▸ %s\033[0m\n' "$1"
}

warn() {
  printf '\033[1;33m⚠ %s\033[0m\n' "$1" >&2
}

success() {
  printf '\033[0;32m✓ %s\033[0m\n' "$1"
}

# ── Validation helpers ───────────────────────────────────────────────────────

need_slug() {
  [ -n "${1:-}" ] || die "slug required"
}

require_env() {
  [ -f "$ENV_FILE" ] || die "Missing $ENV_FILE. Copy showcase/.env.example to showcase/.env and fill in keys."
}

# ── Docker / Compose helpers ─────────────────────────────────────────────────

stage_shared() {
  # Dereference tools/, shared-tools/, and _shared/ symlinks into real copies
  # so Docker COPY can follow them (Docker build contexts can't traverse
  # symlinks that point outside the context). `_shared` carries the
  # single-source CVDIAG bootstrap module into each Python integration context.
  for pkg_dir in "$SHOWCASE_ROOT"/integrations/*/; do
    for link_name in tools shared-tools _shared; do
      local link_path="$pkg_dir/$link_name"
      if [ -L "$link_path" ]; then
        local target
        target="$(readlink "$link_path")"
        # Resolve relative symlink targets against the link's directory
        if [[ "$target" != /* ]]; then
          target="$(cd "$(dirname "$link_path")" && cd "$(dirname "$target")" && pwd)/$(basename "$target")"
        fi
        if [ -d "$target" ]; then
          rm "$link_path"
          rsync -a "$target/" "$link_path/"
        fi
      fi
    done
  done
}

restore_symlinks() {
  # Restore tools/, shared-tools/, and _shared/ symlinks replaced by
  # stage_shared. The integrations/*/_shared glob also matches the canonical
  # source dir integrations/_shared (a real tracked dir) — harmless no-op there.
  (cd "$SHOWCASE_ROOT" && git checkout -- integrations/*/tools integrations/*/shared-tools integrations/*/_shared 2>/dev/null || true)
}

slug_to_container() {
  echo "showcase-${1}"
}

slug_to_port() {
  local slug="${1:?slug required}"
  if command -v jq &>/dev/null; then
    jq -r --arg s "$slug" '.[$s] // empty' "$PORTS_FILE"
  else
    # Fallback: simple grep/sed if jq is not available
    grep "\"$slug\"" "$PORTS_FILE" | sed 's/[^0-9]//g'
  fi
}

is_service_healthy() {
  local slug="${1:?slug required}"
  local container
  container="$(slug_to_container "$slug")"
  local health
  health="$(docker inspect --format='{{.State.Health.Status}}' "$container" 2>/dev/null || echo "missing")"
  [ "$health" = "healthy" ]
}

wait_healthy() {
  local slug="${1:?slug required}"
  local timeout="${2:-30}"
  local elapsed=0
  info "Waiting for $slug to become healthy (timeout ${timeout}s)..."
  while ! is_service_healthy "$slug"; do
    if [ "$elapsed" -ge "$timeout" ]; then
      die "$slug did not become healthy within ${timeout}s"
    fi
    sleep 2
    elapsed=$((elapsed + 2))
  done
  success "$slug is healthy (${elapsed}s)"
}

# ── Isolation helpers ───────────────────────────────────────────────────────

ISOLATE_NAME=""
ISOLATE_PORT_OFFSET=0
ISOLATE_SLOT=""
ISOLATE_ACTIVE=false
ISOLATE_TMPDIR=""
# Set true by cmd-test.sh when --keep is parsed; read by restore_isolation.
# Deliberately a namespaced GLOBAL (not a `local` in cmd_test): the EXIT trap
# fires at top-level script exit, after cmd_test has returned and its locals
# have unwound. Initializing it here also shields against a stray `keep`-like
# env var exported by the user flipping teardown behavior.
ISOLATE_KEEP=false

# Runtime state (slot registry + per-run scratch dirs) lives under
# XDG_STATE_HOME, NOT /tmp — /tmp is world-writable (which made stale-slot
# reaping racy) and gets wiped on reboot (which destroyed the registry/run-dir
# state out from under any surviving docker resources). NB this does NOT make
# --keep reboot-proof: container-liveness protection counts only RUNNING
# containers, so after a reboot (or daemon restart / manual docker stop) the
# kept stack's stopped containers no longer protect its slot — the next
# claim's sweep reclaims it, composing the remnants down (see
# _reap_isolate_slot).
_showcase_state_base() { printf '%s/copilotkit/showcase' "${XDG_STATE_HOME:-$HOME/.local/state}"; }

# Single-user assumption: the slot registry is PER-USER (XDG state), while
# docker compose project names and host ports are HOST-global. Two different
# UNIX users running --isolate concurrently on one host each get their own
# registry, so neither the slot claim nor the duplicate-name guard can see
# the other user's claims — identical port offsets or same-name projects can
# collide across users. Accepted: dev hosts are effectively single-user.
# Note the pid-liveness checks share this assumption: `kill -0` on another
# user's live pid returns EPERM (read here as "dead"), so cross-user slot
# protection via pid is also unreliable.
ISOLATE_SLOT_DIR="$(_showcase_state_base)/slots"
ISOLATE_STALE_THRESHOLD=7200  # 2 hours in seconds — slot-age fallback
# The sweep lock is held only for the duration of one sweep pass (seconds, even
# with all 46 slots populated). A crashed sweeper's leftover lock must not
# disable stale reaping for the full 2-hour SLOT threshold — give the lock its
# own, much shorter staleness threshold.
ISOLATE_SWEEP_LOCK_STALE_THRESHOLD=60  # seconds
# Maximum slot index for --isolate (0 reserved for base stack; 1..N for isolated runs).
ISOLATE_MAX_SLOT=45

# _file_mtime <path> — epoch mtime of a path, or empty when it cannot be
# stat'ed (vanished concurrently, permissions). Callers must treat a
# non-numeric result as "unknown", never as zero.
_file_mtime() {
  if [[ "$OSTYPE" == darwin* ]]; then
    stat -f %m "$1" 2>/dev/null || true
  else
    stat -c %Y "$1" 2>/dev/null || true
  fi
}

# Reap one stale slot: compose any docker remnants of the recorded project
# down (best-effort), then remove the slot's runs/<project> scratch dir AND
# the slot dir itself. Without the runs-dir removal, crashed runs leak orphan
# run dirs under XDG state forever (nothing else cleans them —
# restore_isolation only removes the CURRENT run's dir).
#
# Kept stacks: container-liveness protection applies only while containers
# are RUNNING (the sweep's probe is `docker ps -q`, deliberately — `-aq`
# would let crashed runs' exited containers protect dead slots forever). A
# --keep'd stack whose containers are stopped-but-present (manual `docker
# stop`, daemon restart, host reboot) therefore DOES reach this function:
# its owner pid is dead by design, so the slot is reclaimed. The compose-down
# below keeps that safe — stopped containers and named volumes are removed
# along with the state dirs instead of being stranded with no compose state.
#
# Order matters: runs/<project> FIRST, slot dir LAST. The slot's project
# record is the ONLY pointer to the runs dir — a crash between the two
# removals with the old order (slot first) orphaned the runs dir forever,
# while with this order a surviving slot record simply makes the next sweep
# retry the reap.
_reap_isolate_slot() {
  local slot_entry="${1:?slot entry required}"
  local slot_proj="${2:-}"
  if [ -n "$slot_proj" ]; then
    # The record comes from a user-writable state file — never interpolate it
    # into rm -rf unvalidated (a corrupted record like "../.." would traverse
    # out of the runs dir). Compose project names are [a-z0-9][a-z0-9_-]*; on
    # mismatch, warn and leave the SLOT intact too: the record is the ONLY
    # pointer to the runs dir (see the header above), so reaping the slot
    # anyway would orphan whatever runs dir the record actually points at.
    # A corrupted record is a bug or tampering — leave the evidence in place
    # for manual inspection rather than half-destroy it.
    #
    # Reserved name, same treatment: 'showcase' IS the default stack's compose
    # project name and PASSES the charset check below, so a record reading
    # 'showcase' (a corrupt record, or one written by an older CLI version
    # before apply_isolation reserved the name) would aim the compose-down at
    # the user's LIVE DEFAULT stack — and --volumes would destroy its
    # PocketBase data. apply_isolation refuses the name at claim time, but the
    # reaper must not trust records: warn and leave the whole slot intact for
    # manual inspection (no compose-down, no state removal).
    if [ "$slot_proj" = "showcase" ]; then
      warn "Slot record at $slot_entry names the RESERVED project 'showcase' — that is the LIVE default stack's compose project, so reaping it would compose the default stack down (--volumes included: PocketBase data destroyed). Leaving the slot intact for manual inspection; its runs dir would be $(_showcase_state_base)/runs/$slot_proj"
      return 0
    fi
    if [[ "$slot_proj" =~ ^[a-z0-9][a-z0-9_-]*$ ]]; then
      # Best-effort remnant cleanup BEFORE deleting any state: a stopped kept
      # stack (see the header) still has containers + named volumes; deleting
      # the run dir + slot first would strand them with no compose state
      # (split-brain). `compose -p` resolves resources via project labels, so
      # no -f compose file is needed; failure (daemon down, nothing to remove)
      # is non-fatal — the rm below still reclaims the state dirs.
      docker compose -p "$slot_proj" down --remove-orphans --volumes >/dev/null 2>&1 || true
      # State-removal rms are guarded throughout this file: a concurrent
      # claimant/release can race the same path, and the loser's mid-traversal
      # ENOENT makes rm exit nonzero — which must not kill the CLI under
      # bin/showcase's `set -e` (the state is gone either way).
      rm -rf "$(_showcase_state_base)/runs/$slot_proj" 2>/dev/null || true
    else
      warn "Slot record at $slot_entry names suspicious project '$slot_proj' (path-traversal guard) — leaving the slot intact for manual inspection; its runs dir would be $(_showcase_state_base)/runs/$slot_proj"
      return 0
    fi
  fi
  rm -rf "$slot_entry" 2>/dev/null || true
}

# Release the sweep lock — but ONLY if it is still ours. The takeover path
# below can legitimately move an over-age lock out from under a slow-but-live
# holder and install a fresh lock of its own; if the original holder then
# blindly removed "$sweep_lock" on its way out, it would destroy the
# TAKEOVER's lock and open the door to a THIRD concurrent sweeper. Ownership
# is the pid file written into the lock dir at acquisition.
_release_sweep_lock() {
  local sweep_lock="${1:?sweep lock path required}"
  # Lock (or its pid ownership marker) gone entirely: nothing to release and
  # no holder to report — a takeover mv'd it away, or something external
  # cleaned it up. Distinct from the takeover case below, which has an actual
  # current holder's lock that must be left in place.
  if [ ! -d "$sweep_lock" ] || [ ! -f "$sweep_lock/pid" ]; then
    warn "Sweep lock $sweep_lock vanished while we held it (takeover or external cleanup) — leaving as-is"
    return 0
  fi
  local lock_pid
  lock_pid="$(cat "$sweep_lock/pid" 2>/dev/null || true)"
  if [ "$lock_pid" = "$$" ]; then
    rm -rf "$sweep_lock"
  else
    warn "Sweep lock $sweep_lock was taken over while we held it (current holder pid: ${lock_pid:-unknown}) — leaving it in place"
  fi
}

# Claim an isolation slot using atomic mkdir. Slots 1..ISOLATE_MAX_SLOT are
# usable for --isolate runs; slot 0 is reserved for the base (non-isolate)
# stack. Each slot dir contains a "pid" file for stale-detection. The port
# offset is (slot + 1) * 200, so slot 1 → +400, slot 2 → +600, etc. If
# SHOWCASE_ISO_SLOT is set, the picker pins to that slot; otherwise it
# auto-picks the first free slot in 1..ISOLATE_MAX_SLOT.
_claim_isolate_slot() {
  mkdir -p "$ISOLATE_SLOT_DIR"

  # Reclaim crashed-takeover tombstones: a sweeper that died between the
  # takeover mv and its rm -rf (below) leaves .sweep.lock.tomb.<pid> behind
  # forever — dot-named, so neither the sweep glob nor the claim loop ever
  # sees it, and nothing else cleans it. Age them by the LOCK threshold: a
  # fresh tombstone may belong to a takeover in flight (mv done, rm pending),
  # so only over-age ones are removed.
  local tomb
  for tomb in "$ISOLATE_SLOT_DIR"/.sweep.lock.tomb.*; do
    [ -e "$tomb" ] || continue
    local tomb_mtime
    tomb_mtime="$(_file_mtime "$tomb")"
    [[ "$tomb_mtime" =~ ^[0-9]+$ ]] || continue
    if [ $(( $(date +%s) - tomb_mtime )) -gt "$ISOLATE_SWEEP_LOCK_STALE_THRESHOLD" ]; then
      # This cleanup runs OUTSIDE the sweep lock by design: two claimants can
      # both observe the same over-age tombstone and race the removal, and the
      # loser's mid-traversal ENOENT makes rm exit nonzero — which must not
      # kill the CLI under `set -e` (losing the race is fine; the tombstone is
      # gone either way).
      rm -rf "$tomb" 2>/dev/null || true
    fi
  done

  # Serialize the stale sweep with a lock dir. Without it, two concurrent
  # claimants can both observe slot N stale: A reaps + re-claims it (writing a
  # live pid), then B reaps A's FRESH claim based on its stale observation and
  # claims the same slot — two owners, identical port offsets. The lock is
  # advisory and non-blocking: if another process holds it, we SKIP the sweep
  # entirely (that process is already sweeping) and go straight to the claim
  # loop. The dot-name keeps the lock out of the sweep's [0-9]* glob and the
  # claim loop's numeric slot names.
  local sweep_lock="$ISOLATE_SLOT_DIR/.sweep.lock"
  local have_sweep_lock=false
  if mkdir "$sweep_lock" 2>/dev/null; then
    echo "$$" > "$sweep_lock/pid"   # ownership marker for _release_sweep_lock
    have_sweep_lock=true
  else
    # Lock held — but a sweeper that crashed mid-sweep would leave it behind
    # forever, permanently disabling stale reaping. Take over an over-age lock
    # (dedicated short threshold: the lock is held for seconds, not hours);
    # otherwise (fresh lock, or lock vanished between our mkdir and the stat)
    # skip the sweep this round. A LIVE sweeper refreshes the lock mtime every
    # slot iteration (heartbeat in _sweep_isolate_slots), so an over-age lock
    # really does mean a crashed/wedged holder.
    local lock_mtime
    lock_mtime="$(_file_mtime "$sweep_lock")"
    if [[ "$lock_mtime" =~ ^[0-9]+$ ]] \
      && [ $(( $(date +%s) - lock_mtime )) -gt "$ISOLATE_SWEEP_LOCK_STALE_THRESHOLD" ]; then
      # Atomic takeover: rename the stale lock aside to a unique tombstone
      # first. Two claimants can BOTH observe the lock over-age; with a plain
      # rm+mkdir the slower one could rm the faster one's FRESH replacement
      # lock and retake it — two concurrent sweepers. rename(2) is atomic:
      # exactly one mv wins, the loser's mv fails and it simply skips the
      # sweep this round (it must NOT remove a lock the winner may already
      # have refreshed). The winner disposes of the tombstone and takes a
      # brand-new lock. A crash between mv and rm leaves only a dot-named
      # tombstone, invisible to both the sweep glob and the claim loop —
      # reclaimed once over-age by the tombstone cleanup at the top of this
      # function.
      local lock_tombstone="$ISOLATE_SLOT_DIR/.sweep.lock.tomb.$$"
      if mv "$sweep_lock" "$lock_tombstone" 2>/dev/null; then
        warn "Removing stale sweep lock (crashed sweeper?): $sweep_lock"
        # Guarded: mv preserves the lock's (already over-age) mtime, so this
        # fresh tombstone is immediately over-age too — concurrent claimants'
        # tombstone-reclamation loops (top of this function) legitimately race
        # this removal, and the loser's nonzero rm must not kill the CLI.
        rm -rf "$lock_tombstone" 2>/dev/null || true
        if mkdir "$sweep_lock" 2>/dev/null; then
          echo "$$" > "$sweep_lock/pid"   # ownership marker for _release_sweep_lock
          have_sweep_lock=true
        fi
      fi
    fi
  fi

  if [ "$have_sweep_lock" = true ]; then
    _sweep_isolate_slots
    _release_sweep_lock "$sweep_lock"
  fi

  if [ -n "${SHOWCASE_ISO_SLOT:-}" ]; then
    # Pinned path
    local pinned="$SHOWCASE_ISO_SLOT"
    [[ "$pinned" =~ ^[0-9]+$ ]] || die "SHOWCASE_ISO_SLOT must be a positive integer, got: $pinned"
    [ "$pinned" -ge 1 ] || die "slot 0 is reserved for the base stack — use 1-$ISOLATE_MAX_SLOT"
    [ "$pinned" -le "$ISOLATE_MAX_SLOT" ] || die "SHOWCASE_ISO_SLOT=$pinned exceeds ISOLATE_MAX_SLOT=$ISOLATE_MAX_SLOT"

    local slot_dir="$ISOLATE_SLOT_DIR/$pinned"
    if mkdir "$slot_dir" 2>/dev/null; then
      :   # fresh claim, fall through to port probe
    else
      # EEXIST: consult liveness
      local liveness
      liveness=$(_slot_liveness "$pinned")
      if [ "$liveness" = "live" ]; then
        # Identify the live axis for the message
        local axis="containers/pid"
        die "Slot $pinned is already in use (liveness=$liveness, $axis) — pick a different SHOWCASE_ISO_SLOT or clear it first"
      fi
      # stale or inconclusive: reap and retry once
      local pinned_entry="$ISOLATE_SLOT_DIR/$pinned"
      local pinned_proj
      pinned_proj="$(cat "$pinned_entry/project" 2>/dev/null || true)"
      _reap_isolate_slot "$pinned_entry" "$pinned_proj" || true
      mkdir "$slot_dir" 2>/dev/null || die "Slot $pinned could not be reclaimed after reap — check $slot_dir manually"
    fi
    # Port-probe
    if ! _slot_ports_free "$pinned"; then
      rmdir "$slot_dir" 2>/dev/null || true
      die "Slot $pinned ports are held by a foreign process — see info messages above; clear conflicts or pick a different SHOWCASE_ISO_SLOT"
    fi
    ISOLATE_SLOT="$pinned"
  else
    # Auto-pick path: loop 1..ISOLATE_MAX_SLOT (slot 0 reserved)
    local n=1
    while [ "$n" -le "$ISOLATE_MAX_SLOT" ]; do
      local slot_dir="$ISOLATE_SLOT_DIR/$n"
      if mkdir "$slot_dir" 2>/dev/null; then
        if _slot_ports_free "$n"; then
          ISOLATE_SLOT="$n"
          break
        else
          rmdir "$slot_dir" 2>/dev/null || true
          info "Slot $n ports held, trying next"
          # Benign race: between our rmdir and the next iteration's mkdir attempt, a concurrent
          # claimant can mkdir this same slot dir. That's fine — mkdir is the
          # atomic synchronization point, so only one process can hold a given
          # slot at a time. The concurrent claimant wins; we advance to n+1 and
          # no double-claim occurs. Port-probe and ownership-write (pid file) are
          # also per-slot, so there is no cross-claimant corruption under load.
        fi
      fi
      n=$((n + 1))
    done
    [ -n "${ISOLATE_SLOT:-}" ] || die "No isolation slots available (1-$ISOLATE_MAX_SLOT exhausted)"
  fi

  # Common post-claim
  echo "$$" > "$ISOLATE_SLOT_DIR/$ISOLATE_SLOT/pid"
  ISOLATE_PORT_OFFSET=$(( (ISOLATE_SLOT + 1) * 200 ))
  return 0
}

# Classify a single isolation slot as live | stale | inconclusive — pure
# classification, no reaping, no info logging. Shared between
# _sweep_isolate_slots (which reaps stale slots) and the picker (which avoids
# binding to live slots). Always prints exactly one word to stdout and exits 0.
#
# Signals (in order, matching the sweeper's documented staleness contract):
#   1. Compose-project liveness — live containers under the slot's recorded
#      compose project → live. Docker-ps failure → inconclusive (warn and
#      leave it alone, same as the sweeper).
#   2. Owning-PID liveness — pid file present + numeric + kill -0 succeeds
#      → live. Numeric pid but kill -0 fails → stale.
#   3. Project recorded + no pid file at all → stale (claim writes the pid
#      file BEFORE the project record, so missing pid means owner state is
#      genuinely gone).
#   4. Age fallback — pid check inconclusive (pid file missing on a
#      project-less legacy slot, OR present-but-empty/non-numeric on any
#      slot) AND age > ISOLATE_STALE_THRESHOLD → stale.
#   5. Otherwise → inconclusive (slot dir vanished mid-check, fresh slot
#      whose pid write hasn't landed yet, etc.).
_slot_liveness() {
  local slot="${1:?slot required}"
  local slot_entry="$ISOLATE_SLOT_DIR/$slot"
  if [ ! -d "$slot_entry" ]; then
    printf 'inconclusive\n'
    return 0
  fi
  local slot_proj has_proj=false
  slot_proj="$(cat "$slot_entry/project" 2>/dev/null || true)"
  if [ -n "$slot_proj" ]; then
    has_proj=true
    local live_containers
    if ! live_containers="$(docker ps -q --filter "label=com.docker.compose.project=$slot_proj" 2>/dev/null)"; then
      warn "Cannot verify liveness of slot $slot (docker ps failed) — leaving it alone"
      printf 'inconclusive\n'
      return 0
    fi
    if [ -n "$live_containers" ]; then
      printf 'live\n'
      return 0
    fi
  fi
  local slot_pid_file="$slot_entry/pid"
  local slot_pid="" pid_file_present=false
  if [ -f "$slot_pid_file" ]; then
    pid_file_present=true
    slot_pid="$(cat "$slot_pid_file" 2>/dev/null || true)"
  fi
  if [[ "$slot_pid" =~ ^[0-9]+$ ]]; then
    if kill -0 "$slot_pid" 2>/dev/null; then
      printf 'live\n'
      return 0
    fi
    printf 'stale\n'
    return 0
  fi
  if [ "$has_proj" = true ] && [ "$pid_file_present" = false ]; then
    printf 'stale\n'
    return 0
  fi
  local slot_mtime
  slot_mtime="$(_file_mtime "$slot_entry")"
  if [[ "$slot_mtime" =~ ^[0-9]+$ ]]; then
    local slot_age
    slot_age=$(( $(date +%s) - slot_mtime ))
    if [ "$slot_age" -gt "$ISOLATE_STALE_THRESHOLD" ]; then
      printf 'stale\n'
      return 0
    fi
  fi
  printf 'inconclusive\n'
  return 0
}

# Sweep stale slots. Caller (_claim_isolate_slot) MUST hold .sweep.lock.
_sweep_isolate_slots() {
  # Staleness signals, in order:
  #   1. Compose-project liveness: RUNNING containers always protect the slot
  #      (this is what keeps a --keep'd stack — owning process gone, containers
  #      still up — from being stolen). RUNNING only, deliberately (`docker ps
  #      -q`, not `-aq`): exited containers from crashed runs must not protect
  #      dead slots forever, so a kept stack whose containers were STOPPED
  #      (docker stop, daemon restart, reboot) is reclaimed — with its
  #      remnants composed down by _reap_isolate_slot. A docker failure is NOT
  #      "no containers": if we cannot ask, we leave the slot alone.
  #   2. Owning-PID liveness: a live owning PID always protects the slot. This
  #      matters because apply_isolation records the project BEFORE any
  #      container starts (image builds can take minutes), so "project recorded
  #      + zero containers" alone is NOT proof of staleness.
  #   3. Age: fallback when the pid check is inconclusive — the pid file is
  #      missing on a slot with no recorded project (legacy slots predating
  #      the "project" file), or the pid file EXISTS but its contents are
  #      empty/non-numeric on ANY slot (possibly a live owner whose pid write
  #      was truncated — inconclusive, so it defers to the age fallback
  #      rather than being reaped immediately; once the slot is older than
  #      ISOLATE_STALE_THRESHOLD it IS reaped, inconclusive pid and all,
  #      so such slots don't leak forever). A project-recorded slot
  #      with NO pid file at all is reaped directly: the claim writes the pid
  #      file before the project record, so its absence means the owner state
  #      is genuinely gone.
  local sweep_lock="$ISOLATE_SLOT_DIR/.sweep.lock"
  local slot_entry
  for slot_entry in "$ISOLATE_SLOT_DIR"/[0-9]*; do
    [ -d "$slot_entry" ] || continue
    # Heartbeat: refresh the lock mtime at the top of every iteration so a
    # LIVE sweep never looks over-age to a concurrent claimant. A full sweep
    # makes up to 46 `docker ps` calls; a wedged daemon can stretch that past
    # ISOLATE_SWEEP_LOCK_STALE_THRESHOLD, and without the heartbeat the
    # claimant would "take over" the lock from a sweeper that is still
    # running. Refresh-only, NEVER create: -c behind the -d guard. A bare
    # `touch` here used to RECREATE the lock as a plain FILE when a takeover
    # mv'd the dir away mid-iteration — the takeover's mkdir then failed
    # against the file and sweeping wedged until the 60s over-age self-heal.
    # Failure/vanished lock is non-fatal (_release_sweep_lock handles the
    # taken-over/vanished cases on the way out).
    [ -d "$sweep_lock" ] && touch -c "$sweep_lock" 2>/dev/null || true
    local slot_name
    slot_name="$(basename "$slot_entry")"
    local liveness
    liveness="$(_slot_liveness "$slot_name")"
    if [ "$liveness" = "live" ] || [ "$liveness" = "inconclusive" ]; then
      # `live` → in use (running containers or live owning PID). `inconclusive`
      # → docker-ps failure (already warned by _slot_liveness), or a slot dir
      # that vanished mid-check, or a fresh-but-not-yet-aged slot whose pid
      # write hasn't landed. Either way: leave it alone.
      continue
    fi
    # Stale. Re-derive the evidence to emit the exact reason in the info line
    # before reaping. The reads here mirror _slot_liveness — kept in the
    # sweeper so the helper stays purely classifying.
    local slot_proj has_proj=false
    slot_proj="$(cat "$slot_entry/project" 2>/dev/null || true)"
    [ -n "$slot_proj" ] && has_proj=true
    local slot_pid_file="$slot_entry/pid"
    local slot_pid="" pid_file_present=false
    if [ -f "$slot_pid_file" ]; then
      pid_file_present=true
      slot_pid="$(cat "$slot_pid_file" 2>/dev/null || true)"
    fi
    if [[ "$slot_pid" =~ ^[0-9]+$ ]]; then
      info "Attempting to reclaim stale slot $slot_name (PID $slot_pid dead)"
      _reap_isolate_slot "$slot_entry" "$slot_proj"
      continue
    fi
    if [ "$has_proj" = true ] && [ "$pid_file_present" = false ]; then
      # Project recorded, no live containers, and no pid file at all — the
      # claim writes the pid file BEFORE the project record, so a missing pid
      # file means the owner state is genuinely gone. A pid file that EXISTS
      # but is empty/non-numeric is NOT the same thing: it may be a live owner
      # mid-build whose pid write was truncated — that case is INCONCLUSIVE
      # and falls through to the age fallback below instead of being reaped.
      info "Attempting to reclaim stale slot $slot_name (project $slot_proj has no live containers and no recorded owner)"
      _reap_isolate_slot "$slot_entry" "$slot_proj"
      continue
    fi
    # Fallback: age-based cleanup when the pid check is inconclusive (pid file
    # missing on a project-less legacy slot, or present-but-empty/non-numeric
    # contents on any slot). Capture the mtime with a
    # failure guard: a concurrent release can rm -rf the slot between our glob
    # and this stat, and an empty substitution inside $(( )) is a syntax error
    # that would kill the whole CLI under `set -e`. A vanished slot needs no
    # reaping — skip it.
    local slot_mtime
    slot_mtime="$(_file_mtime "$slot_entry")"
    [[ "$slot_mtime" =~ ^[0-9]+$ ]] || continue
    local slot_age
    slot_age=$(( $(date +%s) - slot_mtime ))
    if [ "$slot_age" -gt "$ISOLATE_STALE_THRESHOLD" ]; then
      # Surface WHY the pid check was inconclusive — it's the evidence that
      # routed this slot to the age fallback in the first place.
      local pid_evidence="no pid file"
      if [ "$pid_file_present" = true ]; then
        pid_evidence="pid file present but empty/non-numeric"
      fi
      info "Attempting to reclaim stale slot $slot_name (age ${slot_age}s > ${ISOLATE_STALE_THRESHOLD}s; owner-pid check inconclusive: $pid_evidence)"
      _reap_isolate_slot "$slot_entry" "$slot_proj"
    fi
  done
}

# Release the claimed isolation slot. The parent slots dir is deliberately
# LEFT IN PLACE: removing it here raced a concurrent claimer between its
# `mkdir -p` of the parent and its per-slot mkdir — every slot mkdir then
# failed ENOENT and the claimer died "No isolation slots available". An empty
# slots dir under XDG state is harmless.
_release_isolate_slot() {
  if [ -n "$ISOLATE_SLOT" ] && [ -d "$ISOLATE_SLOT_DIR/$ISOLATE_SLOT" ]; then
    rm -rf "$ISOLATE_SLOT_DIR/$ISOLATE_SLOT" 2>/dev/null || true
  fi
  ISOLATE_SLOT=""
}

# Print every host port that the given isolation slot will bind, one per line.
# Includes all slug ports from PORTS_FILE and the four infra base ports.
# Each output port = base + (slot+1)*200.
_slot_offset_ports() {
  local slot="${1:?slot required}"

  # Validate: must be a non-negative integer
  if ! printf '%s' "$slot" | grep -qE '^[0-9]+$'; then
    die "_slot_offset_ports: slot must be a non-negative integer, got: $slot"
  fi
  if [ "$slot" -gt "$ISOLATE_MAX_SLOT" ]; then
    die "_slot_offset_ports: slot $slot exceeds ISOLATE_MAX_SLOT ($ISOLATE_MAX_SLOT)"
  fi

  local offset=$(( (slot + 1) * 200 ))
  local infra_ports=(4010 8090 3210 8081)

  # Slug ports from PORTS_FILE
  local port_values
  if command -v jq &>/dev/null; then
    port_values="$(jq -r 'to_entries[] | .value' "$PORTS_FILE" 2>/dev/null)"
  else
    port_values="$(grep -o '"[^"]*"[[:space:]]*:[[:space:]]*[0-9]*' "$PORTS_FILE" | sed 's/.*:[[:space:]]*//')"
  fi

  while IFS= read -r base; do
    [ -z "$base" ] && continue
    printf '%d\n' $(( base + offset ))
  done <<< "$port_values"

  # Infra ports
  for base in "${infra_ports[@]}"; do
    printf '%d\n' $(( base + offset ))
  done
}

# _slot_ports_free <slot> — probe every port the slot would bind for non-self
# listeners. Returns 0 if all ports are free (or only held by this slot's own
# compose project), 1 if any port is held by a foreign process. Emits one
# `info` line per held port. Requires lsof (matches cmd-doctor.sh convention).
_slot_ports_free() {
  local slot="${1:?slot required}"
  if ! command -v lsof &>/dev/null; then
    die "--isolate requires lsof; install it"
  fi

  local slot_proj=""
  local slot_proj_file="$ISOLATE_SLOT_DIR/$slot/project"
  if [ -f "$slot_proj_file" ]; then
    slot_proj="$(cat "$slot_proj_file" 2>/dev/null || true)"
  fi

  local liveness=""
  local any_held=0
  local port
  while IFS= read -r port; do
    [ -z "$port" ] && continue
    local listeners
    listeners="$(lsof -i :"$port" -sTCP:LISTEN -P -n 2>/dev/null | tail -n +2 || true)"
    [ -z "$listeners" ] && continue

    local line
    while IFS= read -r line; do
      [ -z "$line" ] && continue
      local proc_name
      proc_name="$(printf '%s\n' "$line" | awk '{print $1}')"
      # Own-project filter: a docker/com.docker listener on a slot whose own
      # compose project is recorded and live is treated as the slot's own
      # binding, not a foreign hold.
      if printf '%s' "$proc_name" | grep -qiE 'docker|com\.docker'; then
        if [ -n "$slot_proj" ]; then
          if [ -z "$liveness" ]; then
            liveness="$(_slot_liveness "$slot")"
          fi
          if [ "$liveness" = "live" ]; then
            continue
          fi
        fi
      fi
      info "Slot $slot port $port held by $proc_name"
      any_held=1
    done <<< "$listeners"
  done < <(_slot_offset_ports "$slot")

  if [ "$any_held" -eq 0 ]; then
    return 0
  fi
  return 1
}

# _slot_state <slot> — emit one pipe-delimited line describing the slot:
#   slot|dir|pid|liveness|ports|offset|project
# Always exits 0. For an absent slot dir, ports is "-" (no probe) to keep the
# `bin/showcase slots` table tidy.
_slot_state() {
  local slot="${1:?slot required}"
  local slot_entry="$ISOLATE_SLOT_DIR/$slot"

  local dir="absent"
  [ -d "$slot_entry" ] && dir="present"

  local pid="-"
  if [ -f "$slot_entry/pid" ]; then
    local raw_pid
    raw_pid="$(cat "$slot_entry/pid" 2>/dev/null || true)"
    if [[ "$raw_pid" =~ ^[0-9]+$ ]]; then
      pid="$raw_pid"
    fi
  fi

  local project="-"
  if [ -f "$slot_entry/project" ]; then
    local raw_proj
    raw_proj="$(cat "$slot_entry/project" 2>/dev/null || true)"
    if [ -n "$raw_proj" ]; then
      project="$raw_proj"
    fi
  fi

  local liveness
  liveness="$(_slot_liveness "$slot")"

  local ports="-"
  if [ "$dir" = "present" ]; then
    if ! command -v lsof >/dev/null 2>&1; then
      ports="?"
    elif _slot_ports_free "$slot" >/dev/null 2>&1; then
      ports="free"
    else
      ports="held"
    fi
  fi

  local offset
  if [ "$slot" = "0" ]; then
    offset=0
  else
    offset=$(( (slot + 1) * 200 ))
  fi

  printf '%s|%s|%s|%s|%s|%s|%s\n' \
    "$slot" "$dir" "$pid" "$liveness" "$ports" "$offset" "$project"
  return 0
}

# Contract: callers MUST arm `trap restore_isolation EXIT` BEFORE calling this
# function (cmd-test.sh does). Every die() below — invalid name, slot
# exhaustion, duplicate-name conflict, rewriter failure — relies on that trap
# for cleanup of the claimed slot (and, once created, the runs/<name> dir).
apply_isolation() {
  local name="${1:-}"
  # Slug the run is scoped to (from `showcase test <slug>`). Used below to
  # override the persistent stack's hardcoded LOCAL_SERVICES_JSON — that value
  # points at langgraph-python's agentic-chat cell for fast N=1 local demos, so
  # an iso stack for a DIFFERENT slug would inherit the wrong roster and the
  # harness's railway-services local-injection seam would enumerate the wrong
  # service (discovery.railway-services.local-injection count:1 names:["showcase-langgraph-python"]).
  local slug="${2:-}"
  # NB: ISOLATE_ACTIVE is deliberately NOT set here. cmd-test.sh arms
  # `trap restore_isolation EXIT` BEFORE calling this function, so if we
  # flipped it true before COMPOSE_CMD is repointed at the isolated project,
  # any die() below (invalid name, slot exhaustion) would make the trap run
  # `$COMPOSE_CMD down` against the ORIGINAL compose file — silently tearing
  # down the user's live DEFAULT stack. It is set only after the repoint.

  # docker compose project names must start with a lowercase letter or digit,
  # followed by lowercase letters, digits, '-' or '_' ([a-z0-9][a-z0-9_-]*).
  # Reject (or normalize) anything else so the user gets a clear error instead
  # of an opaque compose failure. We normalize-with-warn for ergonomic CLI use.
  if [ -n "$name" ] && ! [[ "$name" =~ ^[a-z0-9][a-z0-9_-]*$ ]]; then
    local lowered
    lowered="$(printf '%s' "$name" | tr '[:upper:]' '[:lower:]')"
    if [[ "$lowered" =~ ^[a-z0-9][a-z0-9_-]*$ ]]; then
      warn "Isolation name '$name' has uppercase chars; lowercasing to '$lowered' (docker compose project-name constraint)"
      name="$lowered"
    else
      die "Invalid --isolate name '$name': must start with a lowercase letter or digit, then lowercase letters, digits, '-' or '_' (docker compose project-name constraint)"
    fi
  fi

  # Reserved name: 'showcase' IS the default stack's compose project name
  # (docker compose defaults the project name to the directory name). It
  # passes the charset check, the container-name rewrite showcase- →
  # showcase- is a no-op, and the idempotent pre-down below would then run
  # `--project-name showcase down --remove-orphans --volumes` against the
  # user's LIVE DEFAULT stack — bypassing every other guard in this file.
  # Checked AFTER the lowercase normalization (so 'Showcase' is caught too)
  # and BEFORE any compose command or state write.
  if [ "$name" = "showcase" ]; then
    die "Isolation name 'showcase' is reserved: it collides with the default stack's compose project name (compose defaults the project to the directory name), so --isolate showcase would tear down the live default stack — pick another name"
  fi

  # Guard: clean up stale .iso-bak files from a prior botched run that
  # mutated originals in-place (the old approach). This makes migration safe.
  # The mv's are race-guarded: two concurrent runs can both see the same stale
  # backup, and the loser's mv (the FINAL command of its AND-list — final-
  # command failures DO trip set -e) would otherwise die pre-claim with a raw
  # error. The survivor's restore wins; the loser proceeds with the restored
  # originals.
  if [ -f "${PORTS_FILE}.iso-bak" ] || [ -f "${COMPOSE_FILE}.iso-bak" ]; then
    warn "Stale .iso-bak files found from a prior crash — restoring originals"
    [ -f "${PORTS_FILE}.iso-bak" ] && mv "${PORTS_FILE}.iso-bak" "$PORTS_FILE" 2>/dev/null || true
    [ -f "${COMPOSE_FILE}.iso-bak" ] && mv "${COMPOSE_FILE}.iso-bak" "$COMPOSE_FILE" 2>/dev/null || true
  fi

  # Claim a slot for unique port offsets
  _claim_isolate_slot

  # Build the isolation name, incorporating the slot for uniqueness
  if [ -z "$name" ]; then
    name="showcase-iso${ISOLATE_SLOT}"
  fi

  ISOLATE_NAME="$name"
  export COMPOSE_PROJECT_NAME="$name"

  # Duplicate-name guard, claim-then-verify. The slot registry only enforces
  # SLOT uniqueness, but the idempotent pre-down below keys on the compose
  # project NAME: a second run reusing a live explicit name would get a
  # different slot yet the same compose project — its pre-down would silently
  # tear down the first run's containers mid-test (or a --keep-parked stack),
  # and two slots recording the same project would corrupt the liveness-reaping
  # signal. Re-running a name after clean teardown still works: the old slot
  # was released, so no record remains.
  #
  # We record our project on our own slot FIRST, and only THEN scan the other
  # slots. (Scan-then-write was a TOCTOU hole: two concurrent same-name claims
  # could both pass the scan and both record the name.) With write-then-scan,
  # the later writer of any concurrent pair is guaranteed to see the earlier
  # writer's record. Backoff is deterministic: we lose against any conflicting
  # record that does NOT strictly postdate ours (older or equal mtime — a
  # strictly NEWER record means the other claimant wrote after us, so its own
  # scan sees our record and IT backs off). Established runs always have older
  # records and therefore always win; two same-second claimants may BOTH back
  # off, which is safe (the names were colliding anyway — nobody tears down a
  # stack they don't own).
  #
  # The verify runs BEFORE the runs/<name> dir is created, so on the
  # conflict-die path ISOLATE_TMPDIR is still unset and the loser's EXIT-trap
  # cleanup removes ONLY its own slot dir — it can never touch the winner's
  # run dir.
  local our_record="$ISOLATE_SLOT_DIR/$ISOLATE_SLOT/project"
  echo "$name" > "$our_record"
  local our_mtime
  our_mtime="$(_file_mtime "$our_record")"
  local other_slot conflict_slot=""
  for other_slot in "$ISOLATE_SLOT_DIR"/[0-9]*; do
    [ -d "$other_slot" ] || continue
    local other_num
    other_num="$(basename "$other_slot")"
    [[ "$other_num" =~ ^[0-9]+$ ]] || continue
    if [ "$other_num" = "$ISOLATE_SLOT" ]; then
      continue
    fi
    local other_proj
    other_proj="$(cat "$other_slot/project" 2>/dev/null || true)"
    [ "$other_proj" = "$name" ] || continue
    local other_mtime
    other_mtime="$(_file_mtime "$other_slot/project")"
    # Record vanished between the read and the stat (a concurrent loser
    # backing off, or a sweep) — no conflict.
    [[ "$other_mtime" =~ ^[0-9]+$ ]] || continue
    if ! [[ "$our_mtime" =~ ^[0-9]+$ ]] || [ "$other_mtime" -le "$our_mtime" ]; then
      conflict_slot="$other_num"
      break
    fi
    # Other record strictly postdates ours → the other claimant is the loser
    # of this pair (its post-write scan sees our older record); keep scanning.
  done
  if [ -n "$conflict_slot" ]; then
    die "isolate name '$name' is already in use by slot $conflict_slot — pick another name, or tear the existing stack down first: docker compose -p $name down --remove-orphans --volumes (if no such run exists, the record may be stale — the sweep is skipped while another run holds the lock; re-running usually resolves it)"
  fi

  # The rewriters below need python3 — check now, with a clear message, while
  # the runs/<name> dir does not exist yet (a die here leaves only our slot
  # for the EXIT trap to clean).
  command -v python3 >/dev/null 2>&1 || die "python3 is required for --isolate"

  # Create per-run scratch dir for overlay copies (originals stay untouched).
  # Keyed by the finalized project name (not the PID) so a --keep'd run is
  # locatable for manual teardown, and lives under XDG state, not /tmp.
  ISOLATE_TMPDIR="$(_showcase_state_base)/runs/$name"
  mkdir -p "$ISOLATE_TMPDIR"

  # Generate offset ports file in the temp dir
  local tmp_ports="$ISOLATE_TMPDIR/local-ports.json"
  python3 -c "
import json, sys
with open('$PORTS_FILE') as f:
    ports = json.load(f)
offset = {k: v + $ISOLATE_PORT_OFFSET for k, v in ports.items()}
with open('$tmp_ports', 'w') as f:
    json.dump(offset, f, indent=2)
    f.write('\n')
"

  # Generate offset compose file in the temp dir
  local tmp_compose="$ISOLATE_TMPDIR/docker-compose.local.yml"
  # Pass slug via env var instead of bash-interpolating into the python
  # source — a slug containing a single quote would break the python literal.
  # Internal-tool risk only (slug is developer-typed), but cheap to harden.
  SHOWCASE_ISO_SLUG="$slug" python3 -c "
import os, re
with open('$COMPOSE_FILE') as f:
    content = f.read()

def offset_port(m):
    indent = m.group(1)
    host = int(m.group(2))
    container = m.group(3)
    return f'{indent}- \"{host + $ISOLATE_PORT_OFFSET}:{container}\"'

content = re.sub(r'(\s+)- \"(\d+):(\d+)\"', offset_port, content)
content = content.replace('container_name: showcase-', 'container_name: $name-')

# Rewrite relative paths to absolute, anchored at SHOWCASE_ROOT. Without this,
# docker compose resolves them against the temp dir holding the rewritten
# compose file and fails (env_file: .env, build: ./pocketbase, volume mounts).
# We touch: build context (./xxx and 'context: ./xxx'), volumes (\"- ./xxx:\"),
# and env_file: .env / .env.local style references.
ROOT = '$SHOWCASE_ROOT'

import os.path as _osp
PARENT = _osp.dirname(ROOT.rstrip('/'))

def _abs(prefix, tail, base):
    return prefix + base.rstrip('/') + '/' + tail

# build: ../foo  /  build: ../   →  rooted at <parent-of-showcase>
content = re.sub(r'(\s+build:\s+)\.\./?([^\n]*)', lambda m: _abs(m.group(1), m.group(2), PARENT), content)
# build: ./foo                    →  rooted at <showcase>
content = re.sub(r'(\s+build:\s+)\./([^\n]+)', lambda m: _abs(m.group(1), m.group(2), ROOT), content)
# context: ../...                 →  rooted at <parent>
content = re.sub(r'(\s+context:\s+)\.\./?([^\n]*)', lambda m: _abs(m.group(1), m.group(2), PARENT), content)
# context: ./foo                  →  rooted at <showcase>
content = re.sub(r'(\s+context:\s+)\./([^\n]+)', lambda m: _abs(m.group(1), m.group(2), ROOT), content)
# dockerfile: ./foo
content = re.sub(r'(\s+dockerfile:\s+)\./([^\n]+)', lambda m: _abs(m.group(1), m.group(2), ROOT), content)
# volumes:  - ./foo:/bar    →  - <showcase>/foo:/bar
content = re.sub(r'(\s+-\s+)\./([^:\n]+:)', lambda m: _abs(m.group(1), m.group(2), ROOT), content)
# env_file: .env            →  <showcase>/.env
content = re.sub(r'(\s+env_file:\s+)\.env(\b)', lambda m: m.group(1) + ROOT + '/.env' + m.group(2), content)

# Per-slug LOCAL_SERVICES_JSON override. The persistent stack hardcodes the
# roster to langgraph-python's agentic-chat (a fast N=1 local-demo default).
# An iso stack scoped to a DIFFERENT slug would inherit that value and the
# harness's railway-services local-injection seam would enumerate the wrong
# service. Rewrite the line to point at the requested slug. Demos are sourced
# from the slug's manifest.yaml; if absent or unparseable, fall back to the
# representative d5 cell ('agentic-chat') so the iso run still targets the
# right container — just with a narrower demo set than d6 would normally use.
SLUG = os.environ.get('SHOWCASE_ISO_SLUG', '')
if SLUG:
    import json as _json
    _os = os
    demos = []
    for _mp in (
        _osp.join(ROOT, 'integrations', SLUG, 'manifest.yaml'),
        _osp.join(ROOT, 'packages', SLUG, 'manifest.yaml'),
    ):
        if _os.path.exists(_mp):
            with open(_mp) as _mf:
                _in_demos = False
                for _line in _mf:
                    _stripped = _line.rstrip('\n')
                    if re.match(r'^demos:\s*$', _stripped):
                        _in_demos = True
                        continue
                    if _in_demos:
                        if re.match(r'^\S', _stripped):
                            break
                        _m = re.match(r'^\s+-\s+id:\s*[\"\']?([A-Za-z0-9_\-]+)', _stripped)
                        if _m:
                            demos.append(_m.group(1))
            break
    if not demos:
        demos = ['agentic-chat']
    _override = _json.dumps([{
        'name': f'showcase-{SLUG}',
        'publicUrl': f'http://{SLUG}:10000',
        'demos': demos,
    }])
    # Replace the entire folded-scalar LOCAL_SERVICES_JSON=[...] payload line.
    # docker-compose.local.yml writes it as:  '        LOCAL_SERVICES_JSON=[...]'
    content = re.sub(
        r'(^\s+)LOCAL_SERVICES_JSON=\[[^\n]*\]',
        lambda m: m.group(1) + 'LOCAL_SERVICES_JSON=' + _override,
        content,
        flags=re.MULTILINE,
    )

with open('$tmp_compose', 'w') as f:
    f.write(content)
"

  # Override shell variables so all downstream code uses the temp files.
  # Originals are NEVER mutated.
  COMPOSE_FILE="$tmp_compose"
  COMPOSE_CMD="docker compose -f $COMPOSE_FILE --project-name $name"