Skip to content

Commit 5d749ca

Browse files
committed
Add Python base RecursiveCharacterTextSplitter
1 parent 02568e9 commit 5d749ca

4 files changed

Lines changed: 84 additions & 1 deletion

File tree

Python/Sources/PythonResources/Export.swift

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,21 @@
11
import Foundation
22

3+
class BundleFinder {}
4+
35
let containingBundle: Bundle? = {
46
if Bundle.main.path(forResource: "site-packages", ofType: nil) != nil {
57
return Bundle.main
68
}
9+
10+
if Bundle.main.bundlePath.contains("Contents/Developer/Platforms") {
11+
// unit tests
12+
let bundle = Bundle(for: BundleFinder.self)
13+
let path = bundle.bundleURL
14+
.deletingLastPathComponent()
15+
.appendingPathComponent("CopilotForXcodeExtensionService.app").path
16+
return Bundle(path: path)
17+
}
18+
719
let path = Bundle.main.bundleURL
820
.appendingPathComponent("Contents")
921
.appendingPathComponent("Applications")
Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
import Foundation
2+
import PythonHelper
3+
import PythonKit
4+
5+
public struct RecursiveCharacterTextSplitter: TextSplitter {
6+
public var separators: [String]
7+
public var chunkSize: Int
8+
public var chunkOverlap: Int
9+
10+
public init(
11+
separators: [String] = ["\n\n", "\n", " ", ""],
12+
chunkSize: Int = 4000,
13+
chunkOverlap: Int = 200
14+
) {
15+
self.separators = separators
16+
self.chunkSize = chunkSize
17+
self.chunkOverlap = chunkOverlap
18+
}
19+
20+
public func split(text: String) async throws -> [String] {
21+
try await runPython {
22+
let text_splitter = try Python.attemptImportOnPythonThread("langchain.text_splitter")
23+
let PythonRecursiveCharacterTextSplitter = text_splitter.RecursiveCharacterTextSplitter
24+
let splitter = PythonRecursiveCharacterTextSplitter(
25+
separators: separators,
26+
chunk_size: chunkSize,
27+
chunk_overlap: chunkOverlap
28+
// length_function: PythonFunction({ object in
29+
// if let string = String(object) { return string.count }
30+
// return 0
31+
// })
32+
)
33+
let result = splitter.split_text(text)
34+
guard let array = [String](result) else { return [] }
35+
return array
36+
}
37+
}
38+
}
39+
Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
import PythonHelper
2+
import XCTest
3+
4+
@testable import LangChain
5+
6+
final class RecursiveCharacterTextSplitterTests: XCTestCase {
7+
override func setUp() async throws {
8+
try await super.setUp()
9+
await initializePython()
10+
}
11+
12+
func test_split_text() async throws {
13+
let splitter = RecursiveCharacterTextSplitter(
14+
separators: ["\n\n", "\n", " ", ""],
15+
chunkSize: 100,
16+
chunkOverlap: 20
17+
)
18+
19+
let text = """
20+
Madam Speaker, Madam Vice President, our First Lady and Second Gentleman. Members of Congress and the Cabinet. Justices of the Supreme Court. My fellow Americans.
21+
"""
22+
23+
let result = try await splitter.split(text: text)
24+
25+
XCTAssertEqual(result, [
26+
"Madam Speaker, Madam Vice President, our First Lady and Second Gentleman. Members of Congress and",
27+
"of Congress and the Cabinet. Justices of the Supreme Court. My fellow Americans."
28+
])
29+
}
30+
}
31+

Tool/Tests/OpenAIServiceTests/LimitMessagesTests.swift

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -101,7 +101,8 @@ private func runService(
101101
))
102102
let memory = AutoManagedChatGPTMemory(
103103
systemPrompt: systemPrompt,
104-
configuration: configuration
104+
configuration: configuration,
105+
functionProvider: NoChatGPTFunctionProvider()
105106
)
106107

107108
for message in messages {

0 commit comments

Comments
 (0)