32
32
import jakarta .json .stream .JsonGenerator ;
33
33
import java .lang .Integer ;
34
34
import java .lang .String ;
35
+ import java .util .List ;
35
36
import java .util .Objects ;
36
37
import java .util .function .Function ;
37
38
import javax .annotation .Nullable ;
63
64
public class ChunkingSettings implements JsonpSerializable {
64
65
private final String strategy ;
65
66
67
+ private final String separatorGroup ;
68
+
69
+ private final List <String > separators ;
70
+
66
71
private final int maxChunkSize ;
67
72
68
73
@ Nullable
@@ -76,6 +81,8 @@ public class ChunkingSettings implements JsonpSerializable {
76
81
private ChunkingSettings (Builder builder ) {
77
82
78
83
this .strategy = ApiTypeHelper .requireNonNull (builder .strategy , this , "strategy" );
84
+ this .separatorGroup = ApiTypeHelper .requireNonNull (builder .separatorGroup , this , "separatorGroup" );
85
+ this .separators = ApiTypeHelper .unmodifiableRequired (builder .separators , this , "separators" );
79
86
this .maxChunkSize = ApiTypeHelper .requireNonNull (builder .maxChunkSize , this , "maxChunkSize" , 0 );
80
87
this .overlap = builder .overlap ;
81
88
this .sentenceOverlap = builder .sentenceOverlap ;
@@ -87,14 +94,60 @@ public static ChunkingSettings of(Function<Builder, ObjectBuilder<ChunkingSettin
87
94
}
88
95
89
96
/**
90
- * Required - The chunking strategy: <code>sentence</code> or <code>word</code>.
97
+ * Required - The chunking strategy: <code>sentence</code>, <code>word</code>,
98
+ * <code>none</code> or <code>recursive</code>.
99
+ * <ul>
100
+ * <li>If <code>strategy</code> is set to <code>recursive</code>, you must also
101
+ * specify:</li>
102
+ * </ul>
103
+ * <ul>
104
+ * <li><code>max_chunk_size</code></li>
105
+ * <li>either <code>separators</code> or<code>separator_group</code></li>
106
+ * </ul>
107
+ * <p>
108
+ * Learn more about different chunking strategies in the linked documentation.
91
109
* <p>
92
110
* API name: {@code strategy}
93
111
*/
94
112
public final String strategy () {
95
113
return this .strategy ;
96
114
}
97
115
116
+ /**
117
+ * Required - This parameter is only applicable when using the
118
+ * <code>recursive</code> chunking strategy.
119
+ * <p>
120
+ * Sets a predefined list of separators in the saved chunking settings based on
121
+ * the selected text type. Values can be <code>markdown</code> or
122
+ * <code>plaintext</code>.
123
+ * <p>
124
+ * Using this parameter is an alternative to manually specifying a custom
125
+ * <code>separators</code> list.
126
+ * <p>
127
+ * API name: {@code separator_group}
128
+ */
129
+ public final String separatorGroup () {
130
+ return this .separatorGroup ;
131
+ }
132
+
133
+ /**
134
+ * Required - A list of strings used as possible split points when chunking text
135
+ * with the <code>recursive</code> strategy.
136
+ * <p>
137
+ * Each string can be a plain string or a regular expression (regex) pattern.
138
+ * The system tries each separator in order to split the text, starting from the
139
+ * first item in the list.
140
+ * <p>
141
+ * After splitting, it attempts to recombine smaller pieces into larger chunks
142
+ * that stay within the <code>max_chunk_size</code> limit, to reduce the total
143
+ * number of chunks generated.
144
+ * <p>
145
+ * API name: {@code separators}
146
+ */
147
+ public final List <String > separators () {
148
+ return this .separators ;
149
+ }
150
+
98
151
/**
99
152
* Required - The maximum size of a chunk in words. This value cannot be higher
100
153
* than <code>300</code> or lower than <code>20</code> (for
@@ -145,6 +198,19 @@ protected void serializeInternal(JsonGenerator generator, JsonpMapper mapper) {
145
198
generator .writeKey ("strategy" );
146
199
generator .write (this .strategy );
147
200
201
+ generator .writeKey ("separator_group" );
202
+ generator .write (this .separatorGroup );
203
+
204
+ if (ApiTypeHelper .isDefined (this .separators )) {
205
+ generator .writeKey ("separators" );
206
+ generator .writeStartArray ();
207
+ for (String item0 : this .separators ) {
208
+ generator .write (item0 );
209
+
210
+ }
211
+ generator .writeEnd ();
212
+
213
+ }
148
214
generator .writeKey ("max_chunk_size" );
149
215
generator .write (this .maxChunkSize );
150
216
@@ -175,6 +241,10 @@ public String toString() {
175
241
public static class Builder extends WithJsonObjectBuilderBase <Builder > implements ObjectBuilder <ChunkingSettings > {
176
242
private String strategy ;
177
243
244
+ private String separatorGroup ;
245
+
246
+ private List <String > separators ;
247
+
178
248
private Integer maxChunkSize ;
179
249
180
250
@ Nullable
@@ -184,7 +254,18 @@ public static class Builder extends WithJsonObjectBuilderBase<Builder> implement
184
254
private Integer sentenceOverlap ;
185
255
186
256
/**
187
- * Required - The chunking strategy: <code>sentence</code> or <code>word</code>.
257
+ * Required - The chunking strategy: <code>sentence</code>, <code>word</code>,
258
+ * <code>none</code> or <code>recursive</code>.
259
+ * <ul>
260
+ * <li>If <code>strategy</code> is set to <code>recursive</code>, you must also
261
+ * specify:</li>
262
+ * </ul>
263
+ * <ul>
264
+ * <li><code>max_chunk_size</code></li>
265
+ * <li>either <code>separators</code> or<code>separator_group</code></li>
266
+ * </ul>
267
+ * <p>
268
+ * Learn more about different chunking strategies in the linked documentation.
188
269
* <p>
189
270
* API name: {@code strategy}
190
271
*/
@@ -193,6 +274,66 @@ public final Builder strategy(String value) {
193
274
return this ;
194
275
}
195
276
277
+ /**
278
+ * Required - This parameter is only applicable when using the
279
+ * <code>recursive</code> chunking strategy.
280
+ * <p>
281
+ * Sets a predefined list of separators in the saved chunking settings based on
282
+ * the selected text type. Values can be <code>markdown</code> or
283
+ * <code>plaintext</code>.
284
+ * <p>
285
+ * Using this parameter is an alternative to manually specifying a custom
286
+ * <code>separators</code> list.
287
+ * <p>
288
+ * API name: {@code separator_group}
289
+ */
290
+ public final Builder separatorGroup (String value ) {
291
+ this .separatorGroup = value ;
292
+ return this ;
293
+ }
294
+
295
+ /**
296
+ * Required - A list of strings used as possible split points when chunking text
297
+ * with the <code>recursive</code> strategy.
298
+ * <p>
299
+ * Each string can be a plain string or a regular expression (regex) pattern.
300
+ * The system tries each separator in order to split the text, starting from the
301
+ * first item in the list.
302
+ * <p>
303
+ * After splitting, it attempts to recombine smaller pieces into larger chunks
304
+ * that stay within the <code>max_chunk_size</code> limit, to reduce the total
305
+ * number of chunks generated.
306
+ * <p>
307
+ * API name: {@code separators}
308
+ * <p>
309
+ * Adds all elements of <code>list</code> to <code>separators</code>.
310
+ */
311
+ public final Builder separators (List <String > list ) {
312
+ this .separators = _listAddAll (this .separators , list );
313
+ return this ;
314
+ }
315
+
316
+ /**
317
+ * Required - A list of strings used as possible split points when chunking text
318
+ * with the <code>recursive</code> strategy.
319
+ * <p>
320
+ * Each string can be a plain string or a regular expression (regex) pattern.
321
+ * The system tries each separator in order to split the text, starting from the
322
+ * first item in the list.
323
+ * <p>
324
+ * After splitting, it attempts to recombine smaller pieces into larger chunks
325
+ * that stay within the <code>max_chunk_size</code> limit, to reduce the total
326
+ * number of chunks generated.
327
+ * <p>
328
+ * API name: {@code separators}
329
+ * <p>
330
+ * Adds one or more values to <code>separators</code>.
331
+ */
332
+ public final Builder separators (String value , String ... values ) {
333
+ this .separators = _listAdd (this .separators , value , values );
334
+ return this ;
335
+ }
336
+
196
337
/**
197
338
* Required - The maximum size of a chunk in words. This value cannot be higher
198
339
* than <code>300</code> or lower than <code>20</code> (for
@@ -259,6 +400,9 @@ public ChunkingSettings build() {
259
400
protected static void setupChunkingSettingsDeserializer (ObjectDeserializer <ChunkingSettings .Builder > op ) {
260
401
261
402
op .add (Builder ::strategy , JsonpDeserializer .stringDeserializer (), "strategy" );
403
+ op .add (Builder ::separatorGroup , JsonpDeserializer .stringDeserializer (), "separator_group" );
404
+ op .add (Builder ::separators , JsonpDeserializer .arrayDeserializer (JsonpDeserializer .stringDeserializer ()),
405
+ "separators" );
262
406
op .add (Builder ::maxChunkSize , JsonpDeserializer .integerDeserializer (), "max_chunk_size" );
263
407
op .add (Builder ::overlap , JsonpDeserializer .integerDeserializer (), "overlap" );
264
408
op .add (Builder ::sentenceOverlap , JsonpDeserializer .integerDeserializer (), "sentence_overlap" );
0 commit comments