Skip to content

Commit 5088dfe

Browse files
committed
feat: Implement streaming chat completions
1 parent f9dccc0 commit 5088dfe

2 files changed

Lines changed: 91 additions & 77 deletions

File tree

src/routes/chat-completions/route.ts

Lines changed: 33 additions & 77 deletions
Original file line numberDiff line numberDiff line change
@@ -4,90 +4,46 @@ import { streamSSE } from "hono/streaming"
44
import { FetchError } from "ofetch"
55

66
import type { ChatCompletionsPayload } from "~/services/copilot/chat-completions/types"
7-
import type { ChatCompletionsChunk } from "~/services/copilot/chat-completions/types.streaming"
87

98
import { chatCompletions } from "~/services/copilot/chat-completions/service"
109

10+
import { createContentChunk, createFinalChunk, segmentResponse } from "./utils"
11+
1112
export const chatCompletionsRoutes = new Hono()
1213

1314
chatCompletionsRoutes.post("/chat/completions", async (c) => {
14-
const payload = await c.req.json<ChatCompletionsPayload>()
15-
16-
payload.stream = false
17-
18-
consola.info(`Received request: ${JSON.stringify(payload).slice(0, 500)}`)
19-
20-
const response = await chatCompletions(payload).catch((error: unknown) => {
15+
try {
16+
const payload = await c.req.json<ChatCompletionsPayload>()
17+
payload.stream = false
18+
19+
consola.info(`Received request: ${JSON.stringify(payload).slice(0, 500)}`)
20+
21+
const response = await chatCompletions(payload)
22+
consola.info(`Response from Copilot: ${JSON.stringify(response)}`)
23+
24+
const segments = segmentResponse(response.choices[0].message.content)
25+
const chunks = segments.map((segment) =>
26+
createContentChunk(segment, response, payload.model),
27+
)
28+
29+
chunks.push(createFinalChunk(response, payload.model))
30+
31+
consola.info(
32+
`Streaming response, first chunk: ${JSON.stringify(chunks.at(0))}`,
33+
)
34+
35+
return streamSSE(c, async (stream) => {
36+
for (const chunk of chunks) {
37+
await stream.writeSSE({
38+
data: JSON.stringify(chunk.data),
39+
})
40+
await stream.sleep(1) // Simulated latency
41+
}
42+
})
43+
} catch (error) {
2144
if (error instanceof FetchError) {
22-
consola.error(
23-
// eslint-disable-next-line @typescript-eslint/no-base-to-string, @typescript-eslint/restrict-template-expressions
24-
`Request failed: ${JSON.stringify(payload)} \n ${error} \n ${error.response?._data}`,
25-
)
45+
consola.error(`Request failed: ${error.message}`, error.response?._data)
2646
}
27-
2847
throw error
29-
})
30-
31-
consola.info(`Response from Copilot: ${JSON.stringify(response)}`)
32-
33-
const segmenter = new Intl.Segmenter("en", { granularity: "word" })
34-
35-
const segmentedMessages = segmenter.segment(
36-
response.choices[0].message.content,
37-
)
38-
39-
const chunks: Array<ChatCompletionsChunk> = Array.from(segmentedMessages).map(
40-
(segment) => ({
41-
data: {
42-
object: "chat.completion.chunk",
43-
choices: [
44-
{
45-
delta: {
46-
content: segment.segment,
47-
role: response.choices[0].message.role,
48-
},
49-
index: 0,
50-
},
51-
],
52-
created: response.created,
53-
id: response.id,
54-
model: payload.model,
55-
usage: null,
56-
system_fingerprint: "fp_44709d6fcb",
57-
},
58-
}),
59-
)
60-
61-
chunks.push({
62-
data: {
63-
object: "chat.completion.chunk",
64-
choices: [
65-
{
66-
delta: {},
67-
finish_reason: response.choices[0].finish_reason,
68-
index: 0,
69-
},
70-
],
71-
created: response.created,
72-
id: response.id,
73-
model: payload.model,
74-
usage: response.usage,
75-
system_fingerprint: "fp_44709d6fcb",
76-
},
77-
})
78-
79-
consola.info(
80-
`Streaming response, first chunk: ${JSON.stringify(chunks.at(0))}`,
81-
)
82-
83-
return streamSSE(c, async (stream) => {
84-
for (const chunk of chunks) {
85-
await stream.writeSSE({
86-
data: JSON.stringify(chunk.data),
87-
})
88-
89-
// Fake latency lol
90-
await stream.sleep(1)
91-
}
92-
})
48+
}
9349
})
Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,58 @@
1+
import type { ChatCompletionResponse } from "~/services/copilot/chat-completions/types"
2+
import type { ChatCompletionsChunk } from "~/services/copilot/chat-completions/types.streaming"
3+
4+
export function createContentChunk(
5+
segment: string,
6+
response: ChatCompletionResponse,
7+
model: string,
8+
): ChatCompletionsChunk {
9+
return {
10+
data: {
11+
object: "chat.completion.chunk",
12+
choices: [
13+
{
14+
delta: {
15+
content: segment,
16+
role: response.choices[0].message.role,
17+
},
18+
index: 0,
19+
},
20+
],
21+
created: response.created,
22+
id: response.id,
23+
model,
24+
usage: null,
25+
system_fingerprint: "fp_44709d6fcb",
26+
},
27+
}
28+
}
29+
30+
export function createFinalChunk(
31+
response: ChatCompletionResponse,
32+
model: string,
33+
): ChatCompletionsChunk {
34+
return {
35+
data: {
36+
object: "chat.completion.chunk",
37+
choices: [
38+
{
39+
delta: {},
40+
finish_reason: response.choices[0].finish_reason,
41+
index: 0,
42+
},
43+
],
44+
created: response.created,
45+
id: response.id,
46+
model,
47+
usage: response.usage,
48+
system_fingerprint: "fp_44709d6fcb",
49+
},
50+
}
51+
}
52+
53+
export function segmentResponse(content: string): Array<string> {
54+
const segmenter = new Intl.Segmenter("en", { granularity: "word" })
55+
return Array.from(segmenter.segment(content)).map(
56+
(segment) => segment.segment,
57+
)
58+
}

0 commit comments

Comments
 (0)