@@ -7,7 +7,7 @@ final class TextSplitterTests: XCTestCase {
77 var chunkSize : Int
88 var chunkOverlap : Int
99 var lengthFunction : ( String ) -> Int = { $0. count }
10- func split( text: String ) async throws -> [ String ] {
10+ func split( text: String ) async throws -> [ TextChunk ] {
1111 [ ]
1212 }
1313 }
@@ -25,7 +25,15 @@ final class TextSplitterTests: XCTestCase {
2525
2626 XCTAssertEqual (
2727 result,
28- [ " Madam " , " Speaker, " , " Madam " , " Vice " , " President, " , " our " , " First " ]
28+ [
29+ . init( text: " Madam " , startUTF16Offset: 0 , endUTF16Offset: 5 ) ,
30+ . init( text: " Speaker, " , startUTF16Offset: 5 , endUTF16Offset: 14 ) ,
31+ . init( text: " Madam " , startUTF16Offset: 14 , endUTF16Offset: 20 ) ,
32+ . init( text: " Vice " , startUTF16Offset: 20 , endUTF16Offset: 25 ) ,
33+ . init( text: " President, " , startUTF16Offset: 25 , endUTF16Offset: 36 ) ,
34+ . init( text: " our " , startUTF16Offset: 36 , endUTF16Offset: 40 ) ,
35+ . init( text: " First " , startUTF16Offset: 40 , endUTF16Offset: 46 ) ,
36+ ]
2937 )
3038 }
3139
@@ -42,7 +50,10 @@ final class TextSplitterTests: XCTestCase {
4250
4351 XCTAssertEqual (
4452 result,
45- [ " Madam Speaker, Madam " , " Vice President, our First " ]
53+ [
54+ . init( text: " Madam Speaker, Madam " , startUTF16Offset: 0 , endUTF16Offset: 20 ) ,
55+ . init( text: " Vice President, our First " , startUTF16Offset: 20 , endUTF16Offset: 46 ) ,
56+ ]
4657 )
4758 }
4859
@@ -53,14 +64,27 @@ final class TextSplitterTests: XCTestCase {
5364 )
5465
5566 let result = splitter. mergeSplits (
56- [ " Madam " , " Speaker, " , " Madam " , " Vice " , " President, " , " our " , " First " ]
67+ [
68+ . init( text: " Madam " , startUTF16Offset: 0 , endUTF16Offset: 5 ) ,
69+ . init( text: " Speaker, " , startUTF16Offset: 5 , endUTF16Offset: 14 ) ,
70+ . init( text: " Madam " , startUTF16Offset: 14 , endUTF16Offset: 20 ) ,
71+ . init( text: " Vice " , startUTF16Offset: 20 , endUTF16Offset: 25 ) ,
72+ . init( text: " President, " , startUTF16Offset: 25 , endUTF16Offset: 36 ) ,
73+ . init( text: " our " , startUTF16Offset: 36 , endUTF16Offset: 40 ) ,
74+ . init( text: " First " , startUTF16Offset: 40 , endUTF16Offset: 46 ) ,
75+ ]
5776 )
5877
5978 XCTAssertEqual (
6079 result,
61- [ " Madam Speaker, " , " Madam Vice " , " President, our " , " our First " ]
80+ [
81+ . init( text: " Madam Speaker, " , startUTF16Offset: 0 , endUTF16Offset: 14 ) ,
82+ . init( text: " Madam Vice " , startUTF16Offset: 14 , endUTF16Offset: 25 ) ,
83+ . init( text: " President, our " , startUTF16Offset: 25 , endUTF16Offset: 40 ) ,
84+ . init( text: " our First " , startUTF16Offset: 36 , endUTF16Offset: 46 ) ,
85+ ]
6286 )
63- XCTAssertTrue ( result. allSatisfy { $0. count <= 15 } )
87+ XCTAssertTrue ( result. allSatisfy { $0. text . count <= 15 } )
6488 }
6589}
6690
0 commit comments