Skip to content

Commit e430646

Browse files
committed
Track usage in OpenAI and Claude models
1 parent bcb0b57 commit e430646

7 files changed

Lines changed: 155 additions & 22 deletions

Tool/Sources/OpenAIService/APIs/BuiltinExtensionChatCompletionsService.swift

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,8 @@ extension BuiltinExtensionChatCompletionsService: ChatCompletionsAPI {
5151
model: model,
5252
message: .init(role: .assistant, content: content),
5353
otherChoices: [],
54-
finishReason: ""
54+
finishReason: "",
55+
usage: nil
5556
)
5657
}
5758
}

Tool/Sources/OpenAIService/APIs/ChatCompletionsAPIDefinition.swift

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -182,12 +182,21 @@ struct ChatCompletionsStreamDataChunk {
182182
var content: String?
183183
var toolCalls: [ToolCall]?
184184
}
185+
186+
struct Usage: Codable, Equatable {
187+
var promptTokens: Int?
188+
var completionTokens: Int?
189+
190+
var cachedTokens: Int?
191+
var otherUsage: [String: Int]
192+
}
185193

186194
var id: String?
187195
var object: String?
188196
var model: String?
189197
var message: Delta?
190198
var finishReason: String?
199+
var usage: Usage?
191200
}
192201

193202
// MARK: - Non Stream API
@@ -198,12 +207,39 @@ protocol ChatCompletionsAPI {
198207

199208
struct ChatCompletionResponseBody: Codable, Equatable {
200209
typealias Message = ChatCompletionsRequestBody.Message
210+
211+
struct Usage: Codable, Equatable {
212+
var promptTokens: Int
213+
var completionTokens: Int
214+
215+
var cachedTokens: Int
216+
var otherUsage: [String: Int]
217+
218+
mutating func merge(with other: ChatCompletionsStreamDataChunk.Usage) {
219+
promptTokens += other.promptTokens ?? 0
220+
completionTokens += other.completionTokens ?? 0
221+
cachedTokens += other.cachedTokens ?? 0
222+
for (key, value) in other.otherUsage {
223+
otherUsage[key, default: 0] += value
224+
}
225+
}
226+
227+
mutating func merge(with other: Self) {
228+
promptTokens += other.promptTokens
229+
completionTokens += other.completionTokens
230+
cachedTokens += other.cachedTokens
231+
for (key, value) in other.otherUsage {
232+
otherUsage[key, default: 0] += value
233+
}
234+
}
235+
}
201236

202237
var id: String?
203238
var object: String
204239
var model: String
205240
var message: Message
206241
var otherChoices: [Message]
207242
var finishReason: String
243+
var usage: Usage?
208244
}
209245

Tool/Sources/OpenAIService/APIs/ClaudeChatCompletionsService.swift

Lines changed: 34 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
import AIModel
2-
import ChatBasic
32
import AsyncAlgorithms
3+
import ChatBasic
44
import CodableWrappers
55
import Foundation
66
import Logger
@@ -57,6 +57,7 @@ public actor ClaudeChatCompletionsService: ChatCompletionsStreamAPI, ChatComplet
5757
var content_block: ContentBlock?
5858
var delta: Delta?
5959
var error: APIError?
60+
var usage: ResponseBody.Usage?
6061

6162
struct Message: Decodable {
6263
var id: String
@@ -66,7 +67,7 @@ public actor ClaudeChatCompletionsService: ChatCompletionsStreamAPI, ChatComplet
6667
var model: String
6768
var stop_reason: String?
6869
var stop_sequence: String?
69-
var usage: Usage?
70+
var usage: ResponseBody.Usage?
7071
}
7172

7273
struct ContentBlock: Decodable {
@@ -75,16 +76,10 @@ public actor ClaudeChatCompletionsService: ChatCompletionsStreamAPI, ChatComplet
7576
}
7677

7778
struct Delta: Decodable {
78-
var type: String
79+
var type: String?
7980
var text: String?
8081
var stop_reason: String?
8182
var stop_sequence: String?
82-
var usage: Usage?
83-
}
84-
85-
struct Usage: Decodable {
86-
var input_tokens: Int?
87-
var output_tokens: Int?
8883
}
8984
}
9085

@@ -112,6 +107,8 @@ public actor ClaudeChatCompletionsService: ChatCompletionsStreamAPI, ChatComplet
112107
struct Usage: Codable, Equatable {
113108
var input_tokens: Int?
114109
var output_tokens: Int?
110+
var cache_creation_input_tokens: Int?
111+
var cache_read_input_tokens: Int?
115112
}
116113

117114
var id: String?
@@ -184,7 +181,7 @@ public actor ClaudeChatCompletionsService: ChatCompletionsStreamAPI, ChatComplet
184181
var text: String
185182
var cache_control: CacheControl?
186183
}
187-
184+
188185
struct Tool: Encodable, Equatable {
189186
var name: String
190187
var description: String
@@ -325,13 +322,26 @@ extension ClaudeChatCompletionsService.ResponseBody {
325322
}
326323
),
327324
otherChoices: [],
328-
finishReason: stop_reason ?? ""
325+
finishReason: stop_reason ?? "",
326+
usage: .init(
327+
promptTokens: usage.input_tokens ?? 0,
328+
completionTokens: usage.output_tokens ?? 0,
329+
cachedTokens: usage.cache_read_input_tokens ?? 0,
330+
otherUsage: {
331+
var otherUsage = [String: Int]()
332+
if let cacheCreation = usage.cache_creation_input_tokens {
333+
otherUsage["cache_creation_input_tokens"] = cacheCreation
334+
}
335+
return otherUsage
336+
}()
337+
)
329338
)
330339
}
331340
}
332341

333342
extension ClaudeChatCompletionsService.StreamDataChunk {
334343
func formalized() -> ChatCompletionsStreamDataChunk {
344+
let usage = usage ?? message?.usage
335345
return .init(
336346
id: message?.id,
337347
object: "chat.completions",
@@ -345,7 +355,19 @@ extension ClaudeChatCompletionsService.StreamDataChunk {
345355
}
346356
return nil
347357
}(),
348-
finishReason: delta?.stop_reason
358+
finishReason: delta?.stop_reason,
359+
usage: .init(
360+
promptTokens: usage?.input_tokens ,
361+
completionTokens: usage?.output_tokens,
362+
cachedTokens: usage?.cache_read_input_tokens,
363+
otherUsage: {
364+
var otherUsage = [String: Int]()
365+
if let cacheCreation = usage?.cache_creation_input_tokens {
366+
otherUsage["cache_creation_input_tokens"] = cacheCreation
367+
}
368+
return otherUsage
369+
}()
370+
)
349371
)
350372
}
351373
}

Tool/Sources/OpenAIService/APIs/GoogleAIChatCompletionsService.swift

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -227,7 +227,7 @@ extension ModelContent {
227227
case .assistant:
228228
if let toolCalls = message.toolCalls {
229229
return toolCalls.map { call in
230-
return """
230+
"""
231231
Function ID: \(call.id)
232232
Call function: \(call.function.name)
233233
Arguments: \(call.function.arguments ?? "{}")
@@ -277,7 +277,8 @@ extension GenerateContentResponse {
277277
model: "",
278278
message: message,
279279
otherChoices: otherMessages,
280-
finishReason: candidates.first?.finishReason?.rawValue ?? ""
280+
finishReason: candidates.first?.finishReason?.rawValue ?? "",
281+
usage: nil
281282
)
282283
}
283284

Tool/Sources/OpenAIService/APIs/OlamaChatCompletionsService.swift

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -94,7 +94,8 @@ extension OllamaChatCompletionsService: ChatCompletionsAPI {
9494
)
9595
} ?? .init(role: .assistant, content: ""),
9696
otherChoices: [],
97-
finishReason: ""
97+
finishReason: "",
98+
usage: nil
9899
)
99100
}
100101
}

Tool/Sources/OpenAIService/APIs/OpenAIChatCompletionsService.swift

Lines changed: 64 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -95,6 +95,7 @@ actor OpenAIChatCompletionsService: ChatCompletionsStreamAPI, ChatCompletionsAPI
9595
var object: String?
9696
var model: String?
9797
var choices: [Choice]?
98+
var usage: ResponseBody.Usage?
9899

99100
struct Choice: Codable {
100101
var delta: Delta?
@@ -143,6 +144,18 @@ actor OpenAIChatCompletionsService: ChatCompletionsStreamAPI, ChatCompletionsAPI
143144
var prompt_tokens: Int?
144145
var completion_tokens: Int?
145146
var total_tokens: Int?
147+
var prompt_tokens_details: PromptTokensDetails?
148+
var completion_tokens_details: CompletionTokensDetails?
149+
150+
struct PromptTokensDetails: Codable, Equatable {
151+
var cached_tokens: Int?
152+
var audio_tokens: Int?
153+
}
154+
155+
struct CompletionTokensDetails: Codable, Equatable {
156+
var reasoning_tokens: Int?
157+
var audio_tokens: Int?
158+
}
146159
}
147160

148161
var id: String?
@@ -198,14 +211,19 @@ actor OpenAIChatCompletionsService: ChatCompletionsStreamAPI, ChatCompletionsAPI
198211
var function: ChatGPTFunctionSchema
199212
}
200213

214+
struct StreamOptions: Codable, Equatable {
215+
var include_usage: Bool = true
216+
}
217+
201218
var model: String
202219
var messages: [Message]
203220
var temperature: Double?
204221
var stream: Bool?
205222
var stop: [String]?
206-
var max_tokens: Int?
223+
var max_completion_tokens: Int?
207224
var tool_choice: FunctionCallStrategy?
208225
var tools: [Tool]?
226+
var stream_options: StreamOptions?
209227
}
210228

211229
var apiKey: String
@@ -295,7 +313,13 @@ actor OpenAIChatCompletionsService: ChatCompletionsStreamAPI, ChatCompletionsAPI
295313
model: "",
296314
message: .init(role: .assistant, content: ""),
297315
otherChoices: [],
298-
finishReason: ""
316+
finishReason: "",
317+
usage: .init(
318+
promptTokens: 0,
319+
completionTokens: 0,
320+
cachedTokens: 0,
321+
otherUsage: [:]
322+
)
299323
)
300324
for try await chunk in stream {
301325
if let id = chunk.id {
@@ -316,6 +340,9 @@ actor OpenAIChatCompletionsService: ChatCompletionsStreamAPI, ChatCompletionsAPI
316340
if let text = chunk.message?.content {
317341
body.message.content += text
318342
}
343+
if let usage = chunk.usage {
344+
body.usage?.merge(with: usage)
345+
}
319346
}
320347
return body
321348
}
@@ -357,7 +384,7 @@ actor OpenAIChatCompletionsService: ChatCompletionsStreamAPI, ChatCompletionsAPI
357384
forHTTPHeaderField: "OpenAI-Project"
358385
)
359386
}
360-
387+
361388
request.setValue("Bearer \(apiKey)", forHTTPHeaderField: "Authorization")
362389
case .openAICompatible:
363390
request.setValue("Bearer \(apiKey)", forHTTPHeaderField: "Authorization")
@@ -420,14 +447,25 @@ extension OpenAIChatCompletionsService.ResponseBody {
420447
message = .init(role: .assistant, content: "")
421448
otherMessages = []
422449
}
450+
451+
let usage = ChatCompletionResponseBody.Usage(
452+
promptTokens: usage.prompt_tokens ?? 0,
453+
completionTokens: usage.completion_tokens ?? 0,
454+
cachedTokens: usage.prompt_tokens_details?.cached_tokens ?? 0,
455+
otherUsage: [
456+
"audio_tokens": usage.completion_tokens_details?.audio_tokens ?? 0,
457+
"reasoning_tokens": usage.completion_tokens_details?.reasoning_tokens ?? 0,
458+
]
459+
)
423460

424461
return .init(
425462
id: id,
426463
object: object,
427464
model: model,
428465
message: message,
429466
otherChoices: otherMessages,
430-
finishReason: choices.first?.finish_reason ?? ""
467+
finishReason: choices.first?.finish_reason ?? "",
468+
usage: usage
431469
)
432470
}
433471
}
@@ -478,7 +516,22 @@ extension OpenAIChatCompletionsService.StreamDataChunk {
478516
}
479517
return nil
480518
}(),
481-
finishReason: choices?.first?.finish_reason
519+
finishReason: choices?.first?.finish_reason,
520+
usage: .init(
521+
promptTokens: usage?.prompt_tokens,
522+
completionTokens: usage?.completion_tokens,
523+
cachedTokens: usage?.prompt_tokens_details?.cached_tokens,
524+
otherUsage: {
525+
var dict = [String: Int]()
526+
if let audioTokens = usage?.completion_tokens_details?.audio_tokens {
527+
dict["audio_tokens"] = audioTokens
528+
}
529+
if let reasoningTokens = usage?.completion_tokens_details?.reasoning_tokens {
530+
dict["reasoning_tokens"] = reasoningTokens
531+
}
532+
return dict
533+
}()
534+
)
482535
)
483536
}
484537
}
@@ -576,14 +629,19 @@ extension OpenAIChatCompletionsService.RequestBody {
576629
temperature = body.temperature
577630
stream = body.stream
578631
stop = body.stop
579-
max_tokens = body.maxTokens
632+
max_completion_tokens = body.maxTokens
580633
tool_choice = body.toolChoice
581634
tools = body.tools?.map {
582635
Tool(
583636
type: $0.type,
584637
function: $0.function
585638
)
586639
}
640+
stream_options = if body.stream ?? false {
641+
StreamOptions()
642+
} else {
643+
nil
644+
}
587645
}
588646
}
589647

0 commit comments

Comments
 (0)