@@ -56,72 +56,73 @@ struct QueryWebsiteFunction: ChatGPTFunction {
5656 }
5757
5858 func call( arguments: Arguments ) async throws -> Result {
59- do {
60- throw CancellationError ( )
61- let embedding = OpenAIEmbedding (
62- configuration: UserPreferenceEmbeddingConfiguration ( )
63- )
64-
65- let queryEmbeddings = try await embedding. embed ( query: arguments. query)
66- let searchCount = UserDefaults . shared. value ( for: \. chatGPTMaxToken) > 5000 ? 3 : 2
67-
68- let result = try await withThrowingTaskGroup (
69- of: [ ( document: Document , distance: Float ) ] . self
70- ) { group in
71- for urlString in arguments. urls {
72- guard let url = URL ( string: urlString) else { continue }
73- group. addTask {
74- if let database = await TemporaryUSearch . view ( identifier: urlString) {
75- return try await database. searchWithDistance (
76- embeddings: queryEmbeddings,
77- count: searchCount
78- )
79- }
80- // 1. grab the website content
81- await reportProgress ( " Loading \( url) .. " )
82- print ( " == load \( url) " )
83- let loader = WebLoader ( urls: [ url] )
84- let documents = try await loader. load ( )
85- await reportProgress ( " Processing \( url) .. " )
86- print ( " == loaded \( url) , documents: \( documents. count) " )
87- // 2. split the content
88- let splitter = RecursiveCharacterTextSplitter (
89- chunkSize: 1000 ,
90- chunkOverlap: 100
91- )
92- let splitDocuments = try await splitter. transformDocuments ( documents)
93- print ( " == split \( url) , documents: \( splitDocuments. count) " )
94- // 3. embedding and store in db
95- await reportProgress ( " Embedding \( url) .. " )
96- let embeddedDocuments = try await embedding. embed ( documents: splitDocuments)
97- print ( " == embedded \( url) " )
98- let database = TemporaryUSearch ( identifier: urlString)
99- try await database. set ( embeddedDocuments)
100- print ( " == save to database \( url) " )
101- let result = try await database. searchWithDistance (
102- embeddings: queryEmbeddings,
103- count: searchCount
104- )
105- print ( " == result of \( url) : \( result) " )
106- return result
107- }
108- }
109-
110- var all = [ ( document: Document, distance: Float) ] ( )
111- for try await result in group {
112- all. append ( contentsOf: result)
113- }
114- await reportProgress ( " Finish reading websites. " )
115- return all
116- . sorted { $0. distance < $1. distance }
117- . prefix ( searchCount)
118- }
119-
120- return . init( relevantDocuments: result. map ( \. document) )
121- } catch {
122- await reportProgress ( " Failed reading websites. " )
123- throw error
124- }
59+ throw CancellationError ( )
60+ // do {
61+ // throw CancellationError()
62+ // let embedding = OpenAIEmbedding(
63+ // configuration: UserPreferenceEmbeddingConfiguration()
64+ // )
65+ //
66+ // let queryEmbeddings = try await embedding.embed(query: arguments.query)
67+ // let searchCount = UserDefaults.shared.value(for: \.chatGPTMaxToken) > 5000 ? 3 : 2
68+ //
69+ // let result = try await withThrowingTaskGroup(
70+ // of: [(document: Document, distance: Float)].self
71+ // ) { group in
72+ // for urlString in arguments.urls {
73+ // guard let url = URL(string: urlString) else { continue }
74+ // group.addTask {
75+ // if let database = await TemporaryUSearch.view(identifier: urlString) {
76+ // return try await database.searchWithDistance(
77+ // embeddings: queryEmbeddings,
78+ // count: searchCount
79+ // )
80+ // }
81+ // // 1. grab the website content
82+ // await reportProgress("Loading \(url)..")
83+ // print("== load \(url)")
84+ // let loader = WebLoader(urls: [url])
85+ // let documents = try await loader.load()
86+ // await reportProgress("Processing \(url)..")
87+ // print("== loaded \(url), documents: \(documents.count)")
88+ // // 2. split the content
89+ // let splitter = RecursiveCharacterTextSplitter(
90+ // chunkSize: 1000,
91+ // chunkOverlap: 100
92+ // )
93+ // let splitDocuments = try await splitter.transformDocuments(documents)
94+ // print("== split \(url), documents: \(splitDocuments.count)")
95+ // // 3. embedding and store in db
96+ // await reportProgress("Embedding \(url)..")
97+ // let embeddedDocuments = try await embedding.embed(documents: splitDocuments)
98+ // print("== embedded \(url)")
99+ // let database = TemporaryUSearch(identifier: urlString)
100+ // try await database.set(embeddedDocuments)
101+ // print("== save to database \(url)")
102+ // let result = try await database.searchWithDistance(
103+ // embeddings: queryEmbeddings,
104+ // count: searchCount
105+ // )
106+ // print("== result of \(url): \(result)")
107+ // return result
108+ // }
109+ // }
110+ //
111+ // var all = [(document: Document, distance: Float)]()
112+ // for try await result in group {
113+ // all.append(contentsOf: result)
114+ // }
115+ // await reportProgress("Finish reading websites.")
116+ // return all
117+ // .sorted { $0.distance < $1.distance }
118+ // .prefix(searchCount)
119+ // }
120+ //
121+ // return .init(relevantDocuments: result.map(\.document))
122+ // } catch {
123+ // await reportProgress("Failed reading websites.")
124+ // throw error
125+ // }
125126 }
126127}
127128
0 commit comments