Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
import ai.djl.serving.util.ConfigManager;
import ai.djl.serving.util.NettyUtils;
import ai.djl.serving.wlm.ModelInfo;
import ai.djl.serving.wlm.util.WlmCapacityException;
import ai.djl.serving.wlm.util.WlmException;
import ai.djl.serving.workflow.Workflow;
import ai.djl.translate.TranslateException;
Expand Down Expand Up @@ -468,28 +469,33 @@ void sendOutput(Output output, ChannelHandlerContext ctx) {
}

void onException(Throwable t, ChannelHandlerContext ctx) {
HttpResponseStatus status;
int code;
if (t instanceof TranslateException || t instanceof BadRequestException) {
logger.debug(t.getMessage(), t);
SERVER_METRIC.info("{}", RESPONSE_4_XX);
status = HttpResponseStatus.BAD_REQUEST;
code = config.getBadRequestErrorHttpCode();
} else if (t instanceof WlmException) {
logger.warn(t.getMessage(), t);
SERVER_METRIC.info("{}", RESPONSE_5_XX);
SERVER_METRIC.info("{}", WLM_ERROR);
status = HttpResponseStatus.SERVICE_UNAVAILABLE;
if (t instanceof WlmCapacityException) {
code = config.getThrottleErrorHttpCode();
} else {
code = config.getWlmErrorHttpCode();
}
if (!exceedErrorRate && config.onWlmError()) {
exceedErrorRate = true;
}
} else {
logger.warn("Unexpected error", t);
SERVER_METRIC.info("{}", RESPONSE_5_XX);
SERVER_METRIC.info("{}", SERVER_ERROR);
status = HttpResponseStatus.INTERNAL_SERVER_ERROR;
code = config.getServerErrorHttpCode();
if (!exceedErrorRate && config.onServerError()) {
exceedErrorRate = true;
}
}
HttpResponseStatus status = HttpResponseStatus.valueOf(code);

/*
* We can load the models based on the configuration file.Since this Job is
Expand Down
50 changes: 50 additions & 0 deletions serving/src/main/java/ai/djl/serving/util/ConfigManager.java
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,11 @@ public final class ConfigManager {
private static final String ERROR_RATE_SERVER = "error_rate_server";
private static final String ERROR_RATE_MODEL = "error_rate_model";
private static final String ERROR_RATE_ANY = "error_rate_any";
private static final String BAD_REQUEST_ERROR_HTTP_CODE = "bad_request_http_code";
private static final String WLM_ERROR_HTTP_CODE = "wlm_error_http_code";
private static final String THROTTLE_ERROR_HTTP_CODE = "throttle_error_http_code";
private static final String TIMEOUT_ERROR_HTTP_CODE = "timeout_http_code";
private static final String SERVER_ERROR_HTTP_CODE = "server_error_http_code";

// Configuration which are not documented or enabled through environment variables
private static final String USE_NATIVE_IO = "use_native_io";
Expand Down Expand Up @@ -443,6 +448,51 @@ public int getChunkedReadTimeout() {
return getIntProperty(CHUNKED_READ_TIMEOUT, 60);
}

/**
* Returns the http response status code to use for bad request errors.
*
* @return the http response status code to use for bad request errors
*/
public int getBadRequestErrorHttpCode() {
return getIntProperty(BAD_REQUEST_ERROR_HTTP_CODE, 400);
}

/**
* Returns the http response status code to use for WorkLoadManager errors.
*
* @return the http response status code to use for WorkLoadManager errors
*/
public int getWlmErrorHttpCode() {
return getIntProperty(WLM_ERROR_HTTP_CODE, 503);
}

/**
* Returns the http response status code to use for throttling errors.
*
* @return the http response status code to use for throttling errors
*/
public int getThrottleErrorHttpCode() {
return getIntProperty(THROTTLE_ERROR_HTTP_CODE, 503);
}

/**
* Returns the http response status code to use for Request Timeout errors.
*
* @return the http response status code to use for Request Timeout errors
*/
public int getTimeoutErrorHttpCode() {
return getIntProperty(TIMEOUT_ERROR_HTTP_CODE, 400);
}

/**
* Returns the http response status code to use for generic Server errors.
*
* @return the http response status code to use for generic Server errors
*/
public int getServerErrorHttpCode() {
return getIntProperty(SERVER_ERROR_HTTP_CODE, 500);
}

/**
* Returns the value with the specified key in this configuration.
*
Expand Down
9 changes: 5 additions & 4 deletions serving/src/test/java/ai/djl/serving/ModelServerTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -954,8 +954,9 @@ private void testThrottle() throws InterruptedException {
if (CudaUtils.getGpuCount() <= 1) {
// one request is not able to saturate workers in multi-GPU case
// one of the request will be throttled
if ((httpStatus.code() != 503 || httpStatus2.code() != 200)
&& (httpStatus2.code() != 503 || httpStatus.code() != 200)) {
int throttleCode = configManager.getThrottleErrorHttpCode();
if ((httpStatus.code() != throttleCode || httpStatus2.code() != 200)
&& (httpStatus2.code() != throttleCode || httpStatus.code() != 200)) {
logger.info("request 1 code: {}, request 2 code: {}", httpStatus, httpStatus2);
Assert.fail("Expected one of the request be throttled.");
}
Expand Down Expand Up @@ -1357,7 +1358,7 @@ private void testRegisterModelMissingUrl() throws InterruptedException {

if (!System.getProperty("os.name").startsWith("Win")) {
ErrorResponse resp = JsonUtils.GSON.fromJson(result, ErrorResponse.class);
assertEquals(resp.getCode(), HttpResponseStatus.BAD_REQUEST.code());
assertEquals(resp.getCode(), configManager.getBadRequestErrorHttpCode());
assertEquals(resp.getMessage(), "Parameter url is required.");
}
}
Expand Down Expand Up @@ -1453,7 +1454,7 @@ private void testServiceUnavailable() throws InterruptedException {

if (!System.getProperty("os.name").startsWith("Win")) {
ErrorResponse resp = JsonUtils.GSON.fromJson(result, ErrorResponse.class);
assertEquals(resp.getCode(), HttpResponseStatus.SERVICE_UNAVAILABLE.code());
assertEquals(resp.getCode(), configManager.getWlmErrorHttpCode());
assertEquals(resp.getMessage(), "All model workers has been shutdown: mlp_2");
}

Expand Down