@@ -56,73 +56,71 @@ struct QueryWebsiteFunction: ChatGPTFunction {
5656 }
5757
5858 func call( arguments: Arguments ) async throws -> Result {
59- throw CancellationError ( )
60- // do {
61- // throw CancellationError()
62- // let embedding = OpenAIEmbedding(
63- // configuration: UserPreferenceEmbeddingConfiguration()
64- // )
65- //
66- // let queryEmbeddings = try await embedding.embed(query: arguments.query)
67- // let searchCount = UserDefaults.shared.value(for: \.chatGPTMaxToken) > 5000 ? 3 : 2
68- //
69- // let result = try await withThrowingTaskGroup(
70- // of: [(document: Document, distance: Float)].self
71- // ) { group in
72- // for urlString in arguments.urls {
73- // guard let url = URL(string: urlString) else { continue }
74- // group.addTask {
75- // if let database = await TemporaryUSearch.view(identifier: urlString) {
76- // return try await database.searchWithDistance(
77- // embeddings: queryEmbeddings,
78- // count: searchCount
79- // )
80- // }
81- // // 1. grab the website content
82- // await reportProgress("Loading \(url)..")
83- // print("== load \(url)")
84- // let loader = WebLoader(urls: [url])
85- // let documents = try await loader.load()
86- // await reportProgress("Processing \(url)..")
87- // print("== loaded \(url), documents: \(documents.count)")
88- // // 2. split the content
89- // let splitter = RecursiveCharacterTextSplitter(
90- // chunkSize: 1000,
91- // chunkOverlap: 100
92- // )
93- // let splitDocuments = try await splitter.transformDocuments(documents)
94- // print("== split \(url), documents: \(splitDocuments.count)")
95- // // 3. embedding and store in db
96- // await reportProgress("Embedding \(url)..")
97- // let embeddedDocuments = try await embedding.embed(documents: splitDocuments)
98- // print("== embedded \(url)")
99- // let database = TemporaryUSearch(identifier: urlString)
100- // try await database.set(embeddedDocuments)
101- // print("== save to database \(url)")
102- // let result = try await database.searchWithDistance(
103- // embeddings: queryEmbeddings,
104- // count: searchCount
105- // )
106- // print("== result of \(url): \(result)")
107- // return result
108- // }
109- // }
110- //
111- // var all = [(document: Document, distance: Float)]()
112- // for try await result in group {
113- // all.append(contentsOf: result)
114- // }
115- // await reportProgress("Finish reading websites.")
116- // return all
117- // .sorted { $0.distance < $1.distance }
118- // .prefix(searchCount)
119- // }
120- //
121- // return .init(relevantDocuments: result.map(\.document))
122- // } catch {
123- // await reportProgress("Failed reading websites.")
124- // throw error
125- // }
59+ do {
60+ let embedding = OpenAIEmbedding (
61+ configuration: UserPreferenceEmbeddingConfiguration ( )
62+ )
63+
64+ let queryEmbeddings = try await embedding. embed ( query: arguments. query)
65+ let searchCount = UserDefaults . shared. value ( for: \. chatGPTMaxToken) > 5000 ? 3 : 20
66+
67+ let result = try await withThrowingTaskGroup (
68+ of: [ ( document: Document , distance: Float ) ] . self
69+ ) { group in
70+ for urlString in arguments. urls {
71+ guard let url = URL ( string: urlString) else { continue }
72+ group. addTask {
73+ if let database = await TemporaryUSearch . view ( identifier: urlString) {
74+ return try await database. searchWithDistance (
75+ embeddings: queryEmbeddings,
76+ count: searchCount
77+ )
78+ }
79+ // 1. grab the website content
80+ await reportProgress ( " Loading \( url) .. " )
81+ print ( " == load \( url) " )
82+ let loader = WebLoader ( urls: [ url] )
83+ let documents = try await loader. load ( )
84+ await reportProgress ( " Processing \( url) .. " )
85+ print ( " == loaded \( url) , documents: \( documents. count) " )
86+ // 2. split the content
87+ let splitter = RecursiveCharacterTextSplitter (
88+ chunkSize: 1000 ,
89+ chunkOverlap: 100
90+ )
91+ let splitDocuments = try await splitter. transformDocuments ( documents)
92+ print ( " == split \( url) , documents: \( splitDocuments. count) " )
93+ // 3. embedding and store in db
94+ await reportProgress ( " Embedding \( url) .. " )
95+ let embeddedDocuments = try await embedding. embed ( documents: splitDocuments)
96+ print ( " == embedded \( url) " )
97+ let database = TemporaryUSearch ( identifier: urlString)
98+ try await database. set ( embeddedDocuments)
99+ print ( " == save to database \( url) " )
100+ let result = try await database. searchWithDistance (
101+ embeddings: queryEmbeddings,
102+ count: searchCount
103+ )
104+ print ( " == result of \( url) : \( result) " )
105+ return result
106+ }
107+ }
108+
109+ var all = [ ( document: Document, distance: Float) ] ( )
110+ for try await result in group {
111+ all. append ( contentsOf: result)
112+ }
113+ await reportProgress ( " Finish reading websites. " )
114+ return all
115+ . sorted { $0. distance < $1. distance }
116+ . prefix ( searchCount)
117+ }
118+
119+ return . init( relevantDocuments: result. map ( \. document) )
120+ } catch {
121+ await reportProgress ( " Failed reading websites. " )
122+ throw error
123+ }
126124 }
127125}
128126
0 commit comments