11import Foundation
22
3+ /// Implementation of splitting text that looks at characters.
4+ /// Recursively tries to split by different characters to find one that works.
35public class RecursiveCharacterTextSplitter : TextSplitter {
4- /**
5- Implementation of splitting text that looks at characters.
6- Recursively tries to split by different characters to find one that works.
7- */
8- public var separators : [ String ]
96 public var chunkSize : Int
107 public var chunkOverlap : Int
118 public var lengthFunction : ( String ) -> Int
129
10+ /// A list of separators to try. They will be used in order. Supports regular expressions.
11+ public var separators : [ String ]
12+
13+ /// Create a new splitter
14+ /// - Parameters:
15+ /// - separators: A list of separators to try. They will be used in order. Supports regular
16+ /// expressions.
17+ /// - chunkSize: The maximum size of chunks. Don't use chunk size larger than 8191, because
18+ /// length safe embedding is not implemented.
19+ /// - chunkOverlap: The maximum overlap between chunks.
20+ /// - lengthFunction: A function to compute the length of text.
1321 public init (
1422 separators: [ String ] = [ " \n \n " , " \n " , " " , " " ] ,
1523 chunkSize: Int = 4000 ,
@@ -23,6 +31,13 @@ public class RecursiveCharacterTextSplitter: TextSplitter {
2331 self . separators = separators
2432 }
2533
34+ // Create a new splitter
35+ /// - Parameters:
36+ /// - separatorSet: A set of separators to try.
37+ /// - chunkSize: The maximum size of chunks. Don't use chunk size larger than 8191, because
38+ /// length safe embedding is not implemented.
39+ /// - chunkOverlap: The maximum overlap between chunks.
40+ /// - lengthFunction: A function to compute the length of text.
2641 public init (
2742 separatorSet: TextSplitterSeparatorSet ,
2843 chunkSize: Int = 4000 ,
@@ -55,7 +70,7 @@ public class RecursiveCharacterTextSplitter: TextSplitter {
5570 }
5671 var separator : String
5772 var nextSeparators : [ String ]
58-
73+
5974 if let index = firstSeparatorIndex {
6075 separator = separators [ index]
6176 if index < separators. endIndex - 1 {
0 commit comments