forked from CopilotKit/CopilotKit
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathprobe-docs.ts
More file actions
173 lines (157 loc) · 6.73 KB
/
Copy pathprobe-docs.ts
File metadata and controls
173 lines (157 loc) · 6.73 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
// Docs-link probe.
//
// Reads shared/feature-registry.json; for each feature:
// - og_docs_url → HTTP HEAD. 2xx = "ok", else "notfound" / "error".
// - shell_docs_path (relative path like "/docs/features/agentic-chat";
// falls back to legacy `shell_docs_url` if present)
// → check shell-docs/src/content/docs/<path>.mdx
// (or index.mdx). File exists = "ok", else "notfound".
// No network.
//
// `shell_docs_path` is the preferred key (matches the schema in
// `scripts/generate-registry.ts` + per-package `docs-links.json`). The legacy
// `shell_docs_url` alias is retained for backward compatibility with older
// `shared/feature-registry.json` snapshots; if only the legacy key is present
// we emit a one-shot notice in dev so the stale shape doesn't go unnoticed.
//
// Writes shell-dashboard/src/data/docs-status.json. The shell-dashboard UI reads it
// so green ✓ / red ✗ reflect actual reachability, not just "field present."
//
// Intended to run on `pnpm dev` (via predev hook) and CI. Safe to run
// frequently — HEAD requests are cheap and the file list is ~50.
import fs from "fs";
import path from "path";
import { fileURLToPath } from "url";
const __filename = fileURLToPath(import.meta.url);
const __dirname = path.dirname(__filename);
const ROOT = path.resolve(__dirname, "..");
const REGISTRY_PATH = path.join(ROOT, "shared", "feature-registry.json");
// MDX docs content now lives in shell-docs (it owns the docs hostname).
// docs-status.json is consumed only by shell-dashboard, so we emit
// directly into shell-dashboard/src/data/ for the dashboard to read. The CONTENT scan
// source is shell-docs — the "shell_docs_url" field points at paths that
// now serve from docs.showcase.copilotkit.ai.
const SHELL_DOCS_ROOT = path.join(ROOT, "shell-docs", "src", "content", "docs");
const OUTPUT_PATH = path.join(
ROOT,
"shell-dashboard",
"src",
"data",
"docs-status.json",
);
type DocState = "ok" | "missing" | "notfound" | "error";
interface Feature {
id: string;
og_docs_url?: string;
shell_docs_path?: string;
/** @deprecated use `shell_docs_path`; retained for backward compat */
shell_docs_url?: string;
}
// One-shot dev-mode notice when a registry only carries the legacy
// `shell_docs_url` key. Guarded by NODE_ENV so CI (and prod builds) stay
// quiet; surfaces to devs running `pnpm dev` exactly once per process.
let legacyKeyNoticeEmitted = false;
function noteLegacyShellDocsKey(featureId: string): void {
if (legacyKeyNoticeEmitted) return;
if (process.env.NODE_ENV === "production") return;
legacyKeyNoticeEmitted = true;
console.warn(
`[probe-docs] note: feature "${featureId}" (and possibly others) uses legacy "shell_docs_url" key; ` +
`prefer "shell_docs_path" to match the canonical schema in generate-registry.ts`,
);
}
interface FeatureDocStatus {
og: DocState;
shell: DocState;
}
// Soft-404 detection. docs.copilotkit.ai returns HTTP 200 with a
// client-rendered "Page Not Found" view for missing docs. Two signals:
// (a) Next.js header "x-matched-path: /[[...slug]]" → catch-all fallback
// (b) `<meta name="robots" content="noindex">` in body → page asks not to
// be indexed, which docs sites only do for 404s and draft content.
// Both are robust across Next.js-hosted docs; we treat either as notfound.
const NOINDEX_PATTERN =
/<meta\s+name=["']robots["']\s+content=["'][^"']*noindex[^"']*["']/i;
async function probeOg(url: string | undefined): Promise<DocState> {
if (!url) return "missing";
// Hard timeout: without it, a hung upstream would stall the whole probe
// run indefinitely (no default fetch timeout in Node). 10s is generous
// enough for slow docs sites while still bounding CI cost.
const controller = new AbortController();
const timer = setTimeout(() => controller.abort(), 10_000);
try {
const res = await fetch(url, {
method: "GET",
redirect: "follow",
signal: controller.signal,
});
if (res.status === 404) return "notfound";
if (res.status < 200 || res.status >= 400) return "error";
const matched = res.headers.get("x-matched-path") ?? "";
if (matched.includes("[[...") || matched.includes("[..."))
return "notfound";
const body = await res.text();
if (NOINDEX_PATTERN.test(body)) return "notfound";
return "ok";
} catch (err) {
// Log the URL + kind so a spike of "error" states can be diagnosed
// (abort vs. DNS vs. TLS). Silent returns made the output useless.
const e = err as Error & { code?: string; cause?: { code?: string } };
const code = e.code ?? e.cause?.code ?? "";
const detail = code ? `${e.name}:${code}` : e.name;
console.warn(
`[probe-docs] probeOg failed ${url} (${detail}): ${e.message}`,
);
return "error";
} finally {
clearTimeout(timer);
}
}
function probeShell(docsPath: string | undefined): DocState {
if (!docsPath) return "missing";
// Strip leading /docs/ prefix to map to content root.
const rel = docsPath.replace(/^\/docs\/?/, "").replace(/\/$/, "");
const candidates = [
path.join(SHELL_DOCS_ROOT, `${rel}.mdx`),
path.join(SHELL_DOCS_ROOT, rel, "index.mdx"),
];
return candidates.some((p) => fs.existsSync(p)) ? "ok" : "notfound";
}
async function main() {
const raw = fs.readFileSync(REGISTRY_PATH, "utf-8");
const registry = JSON.parse(raw) as { features: Feature[] };
const results: Record<string, FeatureDocStatus> = {};
// Probe OG URLs in parallel; shell check is sync filesystem.
// Prefer `shell_docs_path` (canonical) and fall back to the legacy
// `shell_docs_url` key — see header comment.
const entries = await Promise.all(
registry.features.map(async (f) => {
const og = await probeOg(f.og_docs_url);
const shellPath = f.shell_docs_path ?? f.shell_docs_url;
if (f.shell_docs_path === undefined && f.shell_docs_url !== undefined) {
noteLegacyShellDocsKey(f.id);
}
const shell = probeShell(shellPath);
return [f.id, { og, shell }] as const;
}),
);
for (const [id, status] of entries) results[id] = status;
fs.mkdirSync(path.dirname(OUTPUT_PATH), { recursive: true });
fs.writeFileSync(OUTPUT_PATH, JSON.stringify({ features: results }, null, 2));
// Per-feature summary is noisy; print aggregate counts.
const counts = { ok: 0, missing: 0, notfound: 0, error: 0 };
for (const s of Object.values(results)) {
counts[s.og]++;
counts[s.shell]++;
}
console.log(
`Wrote ${OUTPUT_PATH} (${registry.features.length} features × 2 links)`,
);
console.log(
` ok=${counts.ok} notfound=${counts.notfound} error=${counts.error} missing=${counts.missing}`,
);
}
main().catch((err) => {
console.error("[probe-docs] fatal:", err);
process.exit(1);
});