11import Foundation
2+ import LangChain
23import OpenAIService
34import Preferences
45
56struct QueryWebsiteFunction : ChatGPTFunction {
67 struct Arguments : Codable {
78 var query : String
8- var urlString : String
9+ var urls : [ String ]
910 }
10-
11+
1112 struct Result : ChatGPTFunctionResult {
12- var relevantTrunks : [ String ]
13-
13+ var relevantDocuments : [ Document ]
14+
1415 var botReadableContent : String {
1516 // don't forget to remove overlaps
16- return " "
17+ if relevantDocuments. isEmpty {
18+ return " No relevant information found "
19+ }
20+ return relevantDocuments. map ( \. pageContent) . joined ( separator: " \n \n " )
1721 }
1822 }
19-
23+
24+ var reportProgress : ( String ) async -> Void = { _ in }
25+
2026 var name : String {
2127 " queryWebsite "
2228 }
23-
29+
2430 var description : String {
2531 " Useful for when you need to answer a question using information from a website. "
2632 }
27-
33+
2834 var argumentSchema : JSONSchemaValue {
2935 return [
3036 . type: " object " ,
@@ -33,26 +39,88 @@ struct QueryWebsiteFunction: ChatGPTFunction {
3339 . type: " string " ,
3440 . description: " things you want to know about the website " ,
3541 ] ,
36- " urlString " : [
37- . type: " string " ,
38- . description: " the url of the website "
39- ]
42+ " urls " : [
43+ . type: " array " ,
44+ . description: " urls of the website, you can use urls appearing in the conversation " ,
45+ . items: [
46+ . type: " string " ,
47+ ] ,
48+ ] ,
4049 ] ,
41- . required: [ " query " , " urlString " ]
50+ . required: [ " query " , " urls " ] ,
4251 ]
4352 }
44-
45- func message ( at phase : OpenAIService . ChatGPTFunctionCallPhase ) -> String {
46- return " "
53+
54+ func prepare ( ) async {
55+ await reportProgress ( " Reading.. " )
4756 }
48-
57+
4958 func call( arguments: Arguments ) async throws -> Result {
50- // 1. grab the website content
51- // 2. trunk the content
52- // 3. embedding and store in memory
53- // 4. embedding on the query, then search for relevant trunks, choose the 3 most relevant
54- // 5. return the thunks
55-
56- return . init( relevantTrunks: [ ] )
59+ do {
60+ let embedding = OpenAIEmbedding (
61+ configuration: UserPreferenceEmbeddingConfiguration ( )
62+ )
63+
64+ let queryEmbeddings = try await embedding. embed ( query: arguments. query)
65+ let searchCount = UserDefaults . shared. value ( for: \. chatGPTMaxToken) > 5000 ? 3 : 2
66+
67+ let result = try await withThrowingTaskGroup (
68+ of: [ ( document: Document , distance: Float ) ] . self
69+ ) { group in
70+ for urlString in arguments. urls {
71+ guard let url = URL ( string: urlString) else { continue }
72+ group. addTask {
73+ if let database = await TemporaryUSearch . view ( identifier: urlString) {
74+ return try await database. searchWithDistance (
75+ embeddings: queryEmbeddings,
76+ count: searchCount
77+ )
78+ }
79+ // 1. grab the website content
80+ await reportProgress ( " Loading \( url) .. " )
81+ print ( " == load \( url) " )
82+ let loader = WebLoader ( urls: [ url] )
83+ let documents = try await loader. load ( )
84+ await reportProgress ( " Processing \( url) .. " )
85+ print ( " == loaded \( url) , documents: \( documents. count) " )
86+ // 2. split the content
87+ let splitter = RecursiveCharacterTextSplitter (
88+ chunkSize: 1000 ,
89+ chunkOverlap: 100
90+ )
91+ let splitDocuments = try await splitter. transformDocuments ( documents)
92+ print ( " == split \( url) , documents: \( splitDocuments. count) " )
93+ // 3. embedding and store in db
94+ await reportProgress ( " Embedding \( url) .. " )
95+ let embeddedDocuments = try await embedding. embed ( documents: splitDocuments)
96+ print ( " == embedded \( url) " )
97+ let database = TemporaryUSearch ( identifier: urlString)
98+ try await database. set ( embeddedDocuments)
99+ print ( " == save to database \( url) " )
100+ let result = try await database. searchWithDistance (
101+ embeddings: queryEmbeddings,
102+ count: searchCount
103+ )
104+ print ( " == result of \( url) : \( result) " )
105+ return result
106+ }
107+ }
108+
109+ var all = [ ( document: Document, distance: Float) ] ( )
110+ for try await result in group {
111+ all. append ( contentsOf: result)
112+ }
113+ await reportProgress ( " Finish reading websites. " )
114+ return all
115+ . sorted { $0. distance < $1. distance }
116+ . prefix ( searchCount)
117+ }
118+
119+ return . init( relevantDocuments: result. map ( \. document) )
120+ } catch {
121+ await reportProgress ( " Failed reading websites. " )
122+ throw error
123+ }
57124 }
58125}
126+
0 commit comments