Skip to content

Commit 02568e9

Browse files
committed
Add DocumentTransformer
1 parent 3631140 commit 02568e9

1 file changed

Lines changed: 49 additions & 0 deletions

File tree

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
import Foundation
2+
3+
public protocol DocumentTransformer {
4+
func transformDocuments(_ documents: [Document]) async throws -> [Document]
5+
}
6+
7+
public protocol TextSplitter: DocumentTransformer {
8+
/// Split text into multiple components.
9+
func split(text: String) async throws -> [String]
10+
}
11+
12+
public extension TextSplitter {
13+
/// Create documents from a list of texts.
14+
func createDocuments(
15+
texts: [String],
16+
metadata: [[String: Any]] = []
17+
) async throws -> [Document] {
18+
var documents = [Document]()
19+
let paddingLength = texts.count - metadata.count
20+
let metadata = metadata + .init(repeating: [:], count: paddingLength)
21+
for (text, metadata) in zip(texts, metadata) {
22+
let trunks = try await split(text: text)
23+
for trunk in trunks {
24+
let document = Document(pageContent: trunk, metadata: metadata)
25+
documents.append(document)
26+
}
27+
}
28+
return documents
29+
}
30+
31+
/// Split documents.
32+
func splitDocuments(_ documents: [Document]) async throws -> [Document] {
33+
var texts = [String]()
34+
var metadata = [[String: Any]]()
35+
for document in documents {
36+
texts.append(document.pageContent)
37+
metadata.append(document.metadata)
38+
}
39+
return try await createDocuments(texts: texts, metadata: metadata)
40+
}
41+
42+
/// Transform sequence of documents by splitting them.
43+
func transformDocuments(_ documents: [Document]) async throws -> [Document] {
44+
return try await splitDocuments(documents)
45+
}
46+
}
47+
48+
extension TextSplitter {}
49+

0 commit comments

Comments
 (0)