Skip to content

Commit 9423bc2

Browse files
committed
JVMCBC-1477 (Further) Improve config loading backoff for bad credentials
Motivation ---------- The previous change for JVMCBC-1477 increased the retry delay for bad credentials from 1 millisecond to 500 milliseconds. 500ms is probably still too short, as it produces two entries per second in the server's http_access.log. Modifications ------------- Increase the fixed "short delay" from 1ms to 10ms. Use exponential backoff (with the default .5 jitter factor) for the "long delay" exceptions like bad credentials. Implement this by chaining `retryWhen` operators. Consequently, the BucketOpenRetriedEvent is now published when the retry is _scheduled_, not when it's executed. Also, the retry delay is not available to include in the event. These minor changes are acceptable, since the event's Javadoc says it exists "for debugging reasons". Change-Id: Iee603ed6aaab105240fd8982963087bda8fd7cc7 Reviewed-on: https://review.couchbase.org/c/couchbase-jvm-clients/+/207419 Reviewed-by: Michael Reiche <[email protected]> Tested-by: Build Bot <[email protected]> Reviewed-by: David Nault <[email protected]>
1 parent 7927069 commit 9423bc2

File tree

1 file changed

+41
-23
lines changed

1 file changed

+41
-23
lines changed

core-io/src/main/java/com/couchbase/client/core/config/DefaultConfigurationProvider.java

Lines changed: 41 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,7 @@
7474

7575
import javax.naming.NamingException;
7676
import java.time.Duration;
77+
import java.util.Arrays;
7778
import java.util.List;
7879
import java.util.Map;
7980
import java.util.Objects;
@@ -83,6 +84,7 @@
8384
import java.util.concurrent.atomic.AtomicBoolean;
8485
import java.util.concurrent.atomic.AtomicInteger;
8586
import java.util.concurrent.atomic.AtomicReference;
87+
import java.util.function.Predicate;
8688
import java.util.stream.Collectors;
8789
import java.util.stream.Stream;
8890

@@ -962,33 +964,49 @@ private Mono<ProposedBucketConfigContext> fetchBucketConfigs(final String name,
962964

963965
return loadBucketConfigForSeed(identifier, mappedKvPort, mappedManagerPort, name, alternateAddress);
964966
})
965-
.retryWhen(
966-
Retry.from(companion -> companion.flatMap(rs -> {
967-
final Throwable f = rs.failure();
968-
if (shutdown.get()) {
969-
return Mono.error(new AlreadyShutdownException());
970-
}
971-
if (f instanceof UnsupportedConfigMechanismException) {
972-
return Mono.error(Exceptions.propagate(f));
973-
}
974-
975-
boolean bucketNotFound = f instanceof BucketNotFoundDuringLoadException;
976-
boolean bucketNotReady = f instanceof BucketNotReadyDuringLoadException;
977-
boolean noAccess = f instanceof NoAccessDuringConfigLoadException;
978-
979-
// For some, wait a bit longer; retry the rest quickly.
980-
Duration delay = bucketNotFound || bucketNotReady || noAccess
981-
? Duration.ofMillis(500)
982-
: Duration.ofMillis(1);
983-
eventBus.publish(new BucketOpenRetriedEvent(name, delay, core.context(), f));
984-
return Mono
985-
.just(rs.totalRetries())
986-
.delayElement(delay, core.context().environment().scheduler());
987-
})))
967+
// Exponential backoff for certain errors.
968+
.retryWhen(Retry
969+
.backoff(Long.MAX_VALUE, Duration.ofMillis(500))
970+
.maxBackoff(Duration.ofSeconds(10))
971+
.filter(bucketConfigLoadRetryFilter(name, t -> isInstanceOfAnyOf(t,
972+
BucketNotFoundDuringLoadException.class,
973+
BucketNotReadyDuringLoadException.class,
974+
NoAccessDuringConfigLoadException.class
975+
)))
976+
)
977+
// Short fixed delay for the others we want to retry.
978+
.retryWhen(Retry
979+
.fixedDelay(Long.MAX_VALUE, Duration.ofMillis(10))
980+
.filter(bucketConfigLoadRetryFilter(name, t -> !(t instanceof UnsupportedConfigMechanismException)))
981+
)
988982
)
989983
.next();
990984
}
991985

986+
/**
987+
* Wraps a retry filter with common logic that checks for shutdown and publishes events.
988+
*/
989+
private Predicate<? super Throwable> bucketConfigLoadRetryFilter(
990+
String bucketName,
991+
Predicate<? super Throwable> errorFilter
992+
) {
993+
return t -> {
994+
if (shutdown.get()) {
995+
throw new AlreadyShutdownException();
996+
}
997+
998+
boolean retry = errorFilter.test(t);
999+
if (retry) {
1000+
eventBus.publish(new BucketOpenRetriedEvent(bucketName, Duration.ZERO, core.context(), t));
1001+
}
1002+
return retry;
1003+
};
1004+
}
1005+
1006+
private static boolean isInstanceOfAnyOf(Object o, Class<?>... candidates) {
1007+
return Arrays.stream(candidates).anyMatch(it -> it.isInstance(o));
1008+
}
1009+
9921010
private Mono<ProposedGlobalConfigContext> fetchGlobalConfigs(final Set<SeedNode> seedNodes, final boolean tls,
9931011
boolean allowStaleSeeds, boolean retryTimeouts) {
9941012
final AtomicBoolean hasErrored = new AtomicBoolean();

0 commit comments

Comments
 (0)