forked from CopilotKit/CopilotKit
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathentrypoint.sh
More file actions
executable file
·108 lines (96 loc) · 4.19 KB
/
Copy pathentrypoint.sh
File metadata and controls
executable file
·108 lines (96 loc) · 4.19 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
#!/bin/bash
set -e
cleanup() {
kill $AGENT_PID $NEXTJS_PID $WATCHDOG_PID 2>/dev/null || true
}
trap cleanup EXIT
# Disable Python stdout buffering so the FastAPI/uvicorn agent flushes
# tracebacks and log lines immediately. Without this a silent crash during
# module import can sit in Python's userspace buffer until the process
# exits, by which point the container is already gone.
export PYTHONUNBUFFERED=1
echo "========================================="
echo "[entrypoint] Starting showcase package: ms-agent-python"
echo "[entrypoint] Time: $(date -u)"
echo "[entrypoint] PORT=${PORT:-not set}"
echo "[entrypoint] NODE_ENV=${NODE_ENV:-not set}"
echo "========================================="
if [ -z "$OPENAI_API_KEY" ]; then
echo "[entrypoint] WARNING: OPENAI_API_KEY is not set! Agent will fail."
else
echo "[entrypoint] OPENAI_API_KEY: set (${#OPENAI_API_KEY} chars)"
fi
# Start agent backend on :8000 with log prefixing so its output is
# distinguishable from Next.js in the Railway log stream.
#
# Belt-and-suspenders log flushing: `PYTHONUNBUFFERED=1` above exports the env
# var, but a child process could in principle un-export or override it. The
# `-u` flag to the Python interpreter forces unbuffered stdout/stderr at the
# interpreter level and is not overridable by user code. Combined with the
# `fflush()` inside the awk pipe below, this guarantees uvicorn request lines
# and tracebacks reach Railway's log stream line-at-a-time rather than
# block-buffered in pipe buffers.
echo "[entrypoint] Starting Python agent on port 8000..."
python -u -m uvicorn agent_server:app --host 0.0.0.0 --port 8000 &> >(awk '{print "[agent] " $0; fflush()}') &
AGENT_PID=$!
sleep 2
if kill -0 $AGENT_PID 2>/dev/null; then
echo "[entrypoint] Agent started (PID: $AGENT_PID)"
else
echo "[entrypoint] ERROR: Agent failed to start — exiting"
exit 1
fi
echo "========================================="
echo "[entrypoint] Starting Next.js frontend on port ${PORT:-10000}..."
echo "========================================="
PORT=${PORT:-10000}
# Scope NODE_ENV=production to the Next.js invocation ONLY, not the whole
# container environment. `ENV NODE_ENV=production` at the image level would
# leak into every child process (Python agent, shell, healthchecks). `env`
# prefix binds the value to this single exec.
env NODE_ENV=production npx next start --port $PORT &> >(awk '{print "[nextjs] " $0; fflush()}') &
NEXTJS_PID=$!
echo "[entrypoint] Next.js started (PID: $NEXTJS_PID)"
# Watchdog: Railway deploys of showcase packages have been observed to hit a
# silent agent hang — the Python process stays alive (so `wait -n` never
# fires and the container never restarts) but stops responding on :8000.
# Poll the agent's /health endpoint every 30s; after 3 consecutive failures
# (90s of unreachable agent), kill the agent process so `wait -n` returns
# and Railway restarts the container. We kill the agent (not the whole
# script) first so `set -e` + `wait -n; exit $?` handles the restart
# through the normal path rather than a forced `exit` that would bypass
# logging. Generalized from showcase/integrations/crewai-crews/entrypoint.sh
# (PRs #4114 + #4115).
(
FAILS=0
while sleep 30; do
if ! kill -0 $AGENT_PID 2>/dev/null; then
# Agent already dead — wait -n in the main shell will handle it.
break
fi
if curl -fsS --max-time 5 http://127.0.0.1:8000/health > /dev/null 2>&1; then
FAILS=0
else
FAILS=$((FAILS + 1))
echo "[watchdog] Agent health probe failed (count=$FAILS)"
if [ $FAILS -ge 3 ]; then
echo "[watchdog] Agent unresponsive for ~90s — killing PID $AGENT_PID to trigger container restart"
kill -9 $AGENT_PID 2>/dev/null || true
break
fi
fi
done
) &
WATCHDOG_PID=$!
echo "[entrypoint] Watchdog started (PID: $WATCHDOG_PID)"
echo "[entrypoint] All processes running. Waiting..."
wait -n $AGENT_PID $NEXTJS_PID
EXIT_CODE=$?
if ! kill -0 $AGENT_PID 2>/dev/null; then
echo "[entrypoint] Agent (PID: $AGENT_PID) exited with code $EXIT_CODE"
elif ! kill -0 $NEXTJS_PID 2>/dev/null; then
echo "[entrypoint] Next.js (PID: $NEXTJS_PID) exited with code $EXIT_CODE"
else
echo "[entrypoint] A process exited with code $EXIT_CODE"
fi
exit $EXIT_CODE