deepjavalibrary · siddvenk · Jun 27, 2024 · Jun 27, 2024
@@ -26,6 +26,7 @@
 import ai.djl.serving.util.ConfigManager;
 import ai.djl.serving.util.NettyUtils;
 import ai.djl.serving.wlm.ModelInfo;
+import ai.djl.serving.wlm.util.WlmCapacityException;
 import ai.djl.serving.wlm.util.WlmException;
 import ai.djl.serving.workflow.Workflow;
 import ai.djl.translate.TranslateException;
@@ -468,28 +469,33 @@ void sendOutput(Output output, ChannelHandlerContext ctx) {
     }
 
     void onException(Throwable t, ChannelHandlerContext ctx) {
-        HttpResponseStatus status;
+        int code;
         if (t instanceof TranslateException || t instanceof BadRequestException) {
             logger.debug(t.getMessage(), t);
             SERVER_METRIC.info("{}", RESPONSE_4_XX);
-            status = HttpResponseStatus.BAD_REQUEST;
+            code = config.getBadRequestErrorHttpCode();
         } else if (t instanceof WlmException) {
             logger.warn(t.getMessage(), t);
             SERVER_METRIC.info("{}", RESPONSE_5_XX);
             SERVER_METRIC.info("{}", WLM_ERROR);
-            status = HttpResponseStatus.SERVICE_UNAVAILABLE;
+            if (t instanceof WlmCapacityException) {
+                code = config.getThrottleErrorHttpCode();
+            } else {
+                code = config.getWlmErrorHttpCode();
+            }
             if (!exceedErrorRate && config.onWlmError()) {
                 exceedErrorRate = true;
             }
         } else {
             logger.warn("Unexpected error", t);
             SERVER_METRIC.info("{}", RESPONSE_5_XX);
             SERVER_METRIC.info("{}", SERVER_ERROR);
-            status = HttpResponseStatus.INTERNAL_SERVER_ERROR;
+            code = config.getServerErrorHttpCode();
             if (!exceedErrorRate && config.onServerError()) {
                 exceedErrorRate = true;
             }
         }
+        HttpResponseStatus status = HttpResponseStatus.valueOf(code);
 
         /*
          * We can load the models based on the configuration file.Since this Job is

@@ -84,6 +84,11 @@ public final class ConfigManager {
     private static final String ERROR_RATE_SERVER = "error_rate_server";
     private static final String ERROR_RATE_MODEL = "error_rate_model";
     private static final String ERROR_RATE_ANY = "error_rate_any";
+    private static final String BAD_REQUEST_ERROR_HTTP_CODE = "bad_request_http_code";
+    private static final String WLM_ERROR_HTTP_CODE = "wlm_error_http_code";
+    private static final String THROTTLE_ERROR_HTTP_CODE = "throttle_error_http_code";
+    private static final String TIMEOUT_ERROR_HTTP_CODE = "timeout_http_code";
+    private static final String SERVER_ERROR_HTTP_CODE = "server_error_http_code";
 
     // Configuration which are not documented or enabled through environment variables
     private static final String USE_NATIVE_IO = "use_native_io";
@@ -443,6 +448,51 @@ public int getChunkedReadTimeout() {
         return getIntProperty(CHUNKED_READ_TIMEOUT, 60);
     }
 
+    /**
+     * Returns the http response status code to use for bad request errors.
+     *
+     * @return the http response status code to use for bad request errors
+     */
+    public int getBadRequestErrorHttpCode() {
+        return getIntProperty(BAD_REQUEST_ERROR_HTTP_CODE, 400);
+    }
+
+    /**
+     * Returns the http response status code to use for WorkLoadManager errors.
+     *
+     * @return the http response status code to use for WorkLoadManager errors
+     */
+    public int getWlmErrorHttpCode() {
+        return getIntProperty(WLM_ERROR_HTTP_CODE, 503);
+    }
+
+    /**
+     * Returns the http response status code to use for throttling errors.
+     *
+     * @return the http response status code to use for throttling errors
+     */
+    public int getThrottleErrorHttpCode() {
+        return getIntProperty(THROTTLE_ERROR_HTTP_CODE, 503);
+    }
+
+    /**
+     * Returns the http response status code to use for Request Timeout errors.
+     *
+     * @return the http response status code to use for Request Timeout errors
+     */
+    public int getTimeoutErrorHttpCode() {
+        return getIntProperty(TIMEOUT_ERROR_HTTP_CODE, 400);
+    }
+
+    /**
+     * Returns the http response status code to use for generic Server errors.
+     *
+     * @return the http response status code to use for generic Server errors
+     */
+    public int getServerErrorHttpCode() {
+        return getIntProperty(SERVER_ERROR_HTTP_CODE, 500);
+    }
+
     /**
      * Returns the value with the specified key in this configuration.
      *

@@ -954,8 +954,9 @@ private void testThrottle() throws InterruptedException {
         if (CudaUtils.getGpuCount() <= 1) {
             // one request is not able to saturate workers in multi-GPU case
             // one of the request will be throttled
-            if ((httpStatus.code() != 503 || httpStatus2.code() != 200)
-                    && (httpStatus2.code() != 503 || httpStatus.code() != 200)) {
+            int throttleCode = configManager.getThrottleErrorHttpCode();
+            if ((httpStatus.code() != throttleCode || httpStatus2.code() != 200)
+                    && (httpStatus2.code() != throttleCode || httpStatus.code() != 200)) {
                 logger.info("request 1 code: {}, request 2 code: {}", httpStatus, httpStatus2);
                 Assert.fail("Expected one of the request be throttled.");
             }
@@ -1357,7 +1358,7 @@ private void testRegisterModelMissingUrl() throws InterruptedException {
 
         if (!System.getProperty("os.name").startsWith("Win")) {
             ErrorResponse resp = JsonUtils.GSON.fromJson(result, ErrorResponse.class);
-            assertEquals(resp.getCode(), HttpResponseStatus.BAD_REQUEST.code());
+            assertEquals(resp.getCode(), configManager.getBadRequestErrorHttpCode());
             assertEquals(resp.getMessage(), "Parameter url is required.");
         }
     }
@@ -1453,7 +1454,7 @@ private void testServiceUnavailable() throws InterruptedException {
 
         if (!System.getProperty("os.name").startsWith("Win")) {
             ErrorResponse resp = JsonUtils.GSON.fromJson(result, ErrorResponse.class);
-            assertEquals(resp.getCode(), HttpResponseStatus.SERVICE_UNAVAILABLE.code());
+            assertEquals(resp.getCode(), configManager.getWlmErrorHttpCode());
             assertEquals(resp.getMessage(), "All model workers has been shutdown: mlp_2");
         }