@@ -194,3 +194,205 @@ rejected_msg: '{"code":-1,"msg":"Too many requests"}'
194
194
redis :
195
195
service_name : redis.static
196
196
` ` `
197
+
198
+ ## 完整示例
199
+
200
+ AI Token 限流插件依赖 Redis 记录剩余可用的 token 数,因此首先需要部署 Redis 服务。
201
+ ` ` ` yaml
202
+ apiVersion : apps/v1
203
+ kind : Deployment
204
+ metadata :
205
+ name : redis
206
+ labels :
207
+ app : redis
208
+ spec :
209
+ replicas : 1
210
+ selector :
211
+ matchLabels :
212
+ app : redis
213
+ template :
214
+ metadata :
215
+ labels :
216
+ app : redis
217
+ spec :
218
+ containers :
219
+ - name : redis
220
+ image : redis
221
+ ports :
222
+ - containerPort : 6379
223
+ ---
224
+ apiVersion : v1
225
+ kind : Service
226
+ metadata :
227
+ name : redis
228
+ labels :
229
+ app : redis
230
+ spec :
231
+ ports :
232
+ - port : 6379
233
+ targetPort : 6379
234
+ selector :
235
+ app : redis
236
+ ---
237
+ ```
238
+
239
+ 在本例中,使用通义千问作为 AI 服务提供商。另外还需要设置 AI 统计插件,因为 AI Token 限流插件依赖 AI 统计插件计算每次请求消耗的 token 数,以下配置限制每分钟的 input 和 output token 总数为 200 个。
240
+
241
+ ``` yaml
242
+ apiVersion : extensions.higress.io/v1alpha1
243
+ kind : WasmPlugin
244
+ metadata :
245
+ name : ai-proxy
246
+ namespace : higress-system
247
+ spec :
248
+ matchRules :
249
+ - config :
250
+ provider :
251
+ type : qwen
252
+ apiTokens :
253
+ - " <YOUR_API_TOKEN>"
254
+ modelMapping :
255
+ ' gpt-3 ' : " qwen-turbo"
256
+ ' gpt-35-turbo ' : " qwen-plus"
257
+ ' gpt-4-turbo ' : " qwen-max"
258
+ ' * ' : " qwen-turbo"
259
+ ingress :
260
+ - qwen
261
+ url : oci://higress-registry.cn-hangzhou.cr.aliyuncs.com/plugins/ai-proxy:v1.0.0
262
+ phase : UNSPECIFIED_PHASE
263
+ priority : 100
264
+ ---
265
+ apiVersion : extensions.higress.io/v1alpha1
266
+ kind : WasmPlugin
267
+ metadata :
268
+ name : ai-statistics
269
+ namespace : higress-system
270
+ spec :
271
+ defaultConfig :
272
+ enable : true
273
+ url : oci://higress-registry.cn-hangzhou.cr.aliyuncs.com/plugins/ai-token-statistics:v1.0.0
274
+ phase : UNSPECIFIED_PHASE
275
+ priority : 200
276
+ ---
277
+ apiVersion : extensions.higress.io/v1alpha1
278
+ kind : WasmPlugin
279
+ metadata :
280
+ name : ai-token-ratelimit
281
+ namespace : higress-system
282
+ spec :
283
+ defaultConfig :
284
+ rule_name : default_limit_by_param_apikey
285
+ rule_items :
286
+ - limit_by_param : apikey
287
+ limit_keys :
288
+ - key : 123456
289
+ token_per_minute : 200
290
+ redis :
291
+ # 默认情况下,为了减轻数据面的压力,Higress 的 global.onlyPushRouteCluster 配置参数被设置为 true,意味着不会自动发现 Kubernetes Service
292
+ # 如果需要使用 Kubernetes Service 作为服务发现,可以将 global.onlyPushRouteCluster 参数设置为 false,
293
+ # 这样就可以直接将 service_name 设置为 Kubernetes Service, 而无须为 Redis 创建 McpBridge 以及 Ingress 路由
294
+ # service_name: redis.default.svc.cluster.local
295
+ service_name : redis.dns
296
+ service_port : 6379
297
+ url : oci://higress-registry.cn-hangzhou.cr.aliyuncs.com/plugins/ai-token-ratelimit:v1.0.0
298
+ phase : UNSPECIFIED_PHASE
299
+ priority : 600
300
+ ` ` `
301
+ 注意,AI Token 限流插件中的 Redis 配置项 ` service_name` 来自 McpBridge 中配置的服务来源,另外我们还需要在 McpBridge 中配置通义千问服务的访问地址。
302
+
303
+ ` ` ` yaml
304
+ apiVersion: networking.higress.io/v1
305
+ kind: McpBridge
306
+ metadata:
307
+ name: default
308
+ namespace: higress-system
309
+ spec:
310
+ registries:
311
+ - domain: dashscope.aliyuncs.com
312
+ name: qwen
313
+ port: 443
314
+ type: dns
315
+ - domain: redis.default.svc.cluster.local # Kubernetes Service
316
+ name: redis
317
+ type: dns
318
+ port: 6379
319
+ ` ` `
320
+
321
+ 分别创建两条路由规则。
322
+
323
+ ` ` ` yaml
324
+ apiVersion: networking.k8s.io/v1
325
+ kind: Ingress
326
+ metadata:
327
+ annotations:
328
+ higress.io/backend-protocol: HTTPS
329
+ higress.io/destination: qwen.dns
330
+ higress.io/proxy-ssl-name: dashscope.aliyuncs.com
331
+ higress.io/proxy-ssl-server-name: "on"
332
+ labels:
333
+ higress.io/resource-definer: higress
334
+ name: qwen
335
+ namespace: higress-system
336
+ spec:
337
+ ingressClassName: higress
338
+ rules:
339
+ - host: qwen-test.com
340
+ http:
341
+ paths:
342
+ - backend:
343
+ resource:
344
+ apiGroup: networking.higress.io
345
+ kind: McpBridge
346
+ name: default
347
+ path: /
348
+ pathType: Prefix
349
+ ---
350
+ apiVersion: networking.k8s.io/v1
351
+ kind: Ingress
352
+ metadata:
353
+ annotations:
354
+ higress.io/destination: redis.dns
355
+ higress.io/ignore-path-case: "false"
356
+ labels:
357
+ higress.io/resource-definer: higress
358
+ name: redis
359
+ spec:
360
+ ingressClassName: higress
361
+ rules:
362
+ - http:
363
+ paths:
364
+ - backend:
365
+ resource:
366
+ apiGroup: networking.higress.io
367
+ kind: McpBridge
368
+ name: default
369
+ path: /
370
+ pathType: Prefix
371
+ ` ` `
372
+
373
+ 触发限流效果如下:
374
+
375
+ ` ` ` bash
376
+ curl "http://qwen-test.com:18000/v1/chat/completions?apikey=123456" -H "Content-Type: application/json" -d '{
377
+ "model": "gpt-3",
378
+ "messages": [
379
+ {
380
+ "role": "user",
381
+ "content": "你好,你是谁?"
382
+ }
383
+ ],
384
+ "stream": false
385
+ }'
386
+ {"id":"88cfa80f-545d-93b4-8ff3-3f5245ca33ba","choices":[{"index":0,"message":{"role":"assistant","content":"我是通义千问,由阿里云开发的AI助手。我可以回答各种问题、提供信息和与用户进行对话。有什么我可以帮助你的吗?"},"finish_reason":"stop"}],"created":1719909825,"model":"qwen-turbo","object":"chat.completion","usage":{"prompt_tokens":13,"completion_tokens":33,"total_tokens":46}}
387
+ curl "http://qwen-test.com:18000/v1/chat/completions?apikey=123456" -H "Content-Type: application/json" -d '{
388
+ "model": "gpt-3",
389
+ "messages": [
390
+ {
391
+ "role": "user",
392
+ "content": "你好,你是谁?"
393
+ }
394
+ ],
395
+ "stream": false
396
+ }'
397
+ Too many requests # 限流成功
398
+ ` ` `
0 commit comments