Skip to content

Commit c0ca28f

Browse files
committed
(@bug W-3315975@) Provide an option to delay auto recovery of lost bookies for planned downtimes or intermittent
Fixing a bug in the earlier implementation where, on a rolling upgrade, we were not honoring 'lostBookieRecoveryDelay'. I.e. if the first bookie goes down, we honor 'lostBookieRecoveryDelay'. But if that bookie comes up and a different bookie goes down we would treat it like multiple bookie failure and start the audit. This change fixes this problem such that when the second bookie goes down it realizes that there is only one bookie down and hence doesn't treat it like a multiple bookie failure scenario.
1 parent b26e5f6 commit c0ca28f

File tree

2 files changed

+131
-32
lines changed

2 files changed

+131
-32
lines changed

bookkeeper-server/src/main/java/org/apache/bookkeeper/replication/Auditor.java

Lines changed: 48 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
package org.apache.bookkeeper.replication;
2222

2323
import com.google.common.base.Stopwatch;
24+
import com.google.common.collect.Lists;
2425
import com.google.common.collect.Sets;
2526

2627
import org.apache.bookkeeper.client.BKException;
@@ -104,6 +105,7 @@ public class Auditor implements BookiesListener {
104105
private final long auditorPeriodicCheckInterval;
105106
private final long bookieCheckInterval;
106107
private volatile boolean isBookieCheckFirstTime;
108+
private Set<String> bookiesToBeAudited = Sets.newHashSet();
107109

108110
public Auditor(final String bookieIdentifier, ServerConfiguration conf,
109111
ZooKeeper zkc, StatsLogger statsLogger) throws UnavailableException {
@@ -195,46 +197,61 @@ public void run() {
195197
waitIfLedgerReplicationDisabled();
196198

197199
List<String> availableBookies = getAvailableBookies();
198-
199200
// casting to String, as knownBookies and availableBookies
200201
// contains only String values
201202
// find new bookies(if any) and update the known bookie list
202203
Collection<String> newBookies = CollectionUtils.subtract(
203204
availableBookies, knownBookies);
204205
knownBookies.addAll(newBookies);
206+
if (!bookiesToBeAudited.isEmpty() && knownBookies.containsAll(bookiesToBeAudited)) {
207+
// the bookie, which went down earlier and had an audit scheduled for,
208+
// has come up. So let us stop tracking it and cancel the audit. Since
209+
// we allow delaying of audit when there is only one failed bookie,
210+
// bookiesToBeAudited should just have 1 element and hence containsAll
211+
// check should be ok
212+
if (auditTask != null && auditTask.cancel(false)) {
213+
auditTask = null;
214+
numDelayedBookieAuditsCancelled.inc();
215+
}
216+
bookiesToBeAudited.clear();
217+
}
205218

206219
// find lost bookies(if any)
207-
Collection<String> lostBookies = CollectionUtils.subtract(
208-
knownBookies, availableBookies);
209-
210-
if (lostBookies.size() > 0) {
211-
knownBookies.removeAll(lostBookies);
212-
if (conf.getLostBookieRecoveryDelay() == 0 ||
213-
lostBookies.size() > 1 ||
214-
auditTask != null) {
215-
// 1) if more than one bookie is down, start the audit immediately;
216-
// 2) if we had scheduled an audit earlier for a lost bookie and in
217-
// the meantime another bookie goes down, let us not delay recovery
218-
// we cancel the previously scheduled audit before starting the new
219-
// one
220-
if (auditTask != null) {
221-
if (auditTask.cancel(false)) {
222-
auditTask = null;
223-
numDelayedBookieAuditsCancelled.inc();
224-
}
225-
}
226-
startAudit(false);
227-
} else {
228-
auditTask = executor.schedule( new Runnable() {
229-
public void run() {
230-
startAudit(false);
231-
auditTask = null;
232-
}
233-
}, conf.getLostBookieRecoveryDelay(), TimeUnit.SECONDS);
234-
numBookieAuditsDelayed.inc();
235-
LOG.info("Delaying the start of bookie audit by " + conf.getLostBookieRecoveryDelay() +
236-
" seconds");
220+
bookiesToBeAudited.addAll(CollectionUtils.subtract(knownBookies, availableBookies));
221+
if (bookiesToBeAudited.size() == 0) {
222+
return;
223+
}
224+
knownBookies.removeAll(bookiesToBeAudited);
225+
if (conf.getLostBookieRecoveryDelay() == 0) {
226+
startAudit(false);
227+
bookiesToBeAudited.clear();
228+
return;
229+
}
230+
if (bookiesToBeAudited.size() > 1) {
231+
// if more than one bookie is down, start the audit immediately;
232+
LOG.info("Multiple bookie failure; not delaying bookie audit. Bookies lost now: "
233+
+ CollectionUtils.subtract(knownBookies, availableBookies)
234+
+"; All lost bookies: " + bookiesToBeAudited.toString());
235+
if (auditTask != null && auditTask.cancel(false)) {
236+
auditTask = null;
237+
numDelayedBookieAuditsCancelled.inc();
237238
}
239+
startAudit(false);
240+
bookiesToBeAudited.clear();
241+
return;
242+
}
243+
if (auditTask == null) {
244+
// if there is no scheduled audit, schedule one
245+
auditTask = executor.schedule( new Runnable() {
246+
public void run() {
247+
startAudit(false);
248+
auditTask = null;
249+
bookiesToBeAudited.clear();
250+
}
251+
}, conf.getLostBookieRecoveryDelay(), TimeUnit.SECONDS);
252+
numBookieAuditsDelayed.inc();
253+
LOG.info("Delaying bookie audit by " + conf.getLostBookieRecoveryDelay()
254+
+ "secs for " + bookiesToBeAudited.toString());
238255
}
239256
} catch (BKException bke) {
240257
LOG.error("Exception getting bookie list", bke);

bookkeeper-server/src/test/java/org/apache/bookkeeper/replication/AuditorLedgerCheckerTest.java

Lines changed: 83 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -436,7 +436,7 @@ public void testDelayedAuditWithMultipleBookieFailures() throws Exception {
436436
// 2 second grace period for the ledgers to get reported as under replicated
437437
Thread.sleep(2000);
438438

439-
// If the following checks pass, it means that auditing happened
439+
// If the following checks pass, it means that audit happened
440440
// within 2 seconds of second bookie going down and it didn't
441441
// wait for 7 more seconds. Hence the second bookie failure doesn't
442442
// delay the audit
@@ -449,6 +449,70 @@ public void testDelayedAuditWithMultipleBookieFailures() throws Exception {
449449
data.contains(shutdownBookie1) && data.contains(shutdownBookie2));
450450
}
451451

452+
/**
453+
* Test audit of bookies is delayed during rolling upgrade scenario:
454+
* a bookies goes down and comes up, the next bookie go down and up and so on.
455+
* At any time only one bookie is down.
456+
*/
457+
@Test(timeout=60000)
458+
public void testDelayedAuditWithRollingUpgrade() throws Exception {
459+
// wait for the periodic bookie check to finish
460+
Thread.sleep(1000);
461+
462+
// create a ledger with a bunch of entries
463+
LedgerHandle lh1 = createAndAddEntriesToLedger();
464+
Long ledgerId = lh1.getId();
465+
LOG.debug("Created ledger : " + ledgerId);
466+
ledgerList.add(ledgerId);
467+
lh1.close();
468+
469+
CountDownLatch underReplicaLatch = registerUrLedgerWatcher(ledgerList.size());
470+
471+
// wait for 5 seconds before starting the recovery work when a bookie fails
472+
baseConf.setLostBookieRecoveryDelay(5);
473+
474+
// shutdown a non auditor bookie to avoid an election
475+
int idx1 = getShutDownNonAuditorBookieIdx("");
476+
ServerConfiguration conf1 = bsConfs.get(idx1);
477+
String shutdownBookie1 = shutdownBookie(idx1);
478+
479+
// wait for 2 seconds and there shouldn't be any under replicated ledgers
480+
// because we have delayed the start of audit by 5 seconds
481+
assertFalse("audit of lost bookie isn't delayed", underReplicaLatch.await(2, TimeUnit.SECONDS));
482+
assertEquals("under replicated ledgers identified when it was not expected", 0,
483+
urLedgerList.size());
484+
485+
// restart the bookie we shut down above
486+
bs.add(startBookie(conf1));
487+
488+
// Now to simulate the rolling upgrade, bring down a bookie different from
489+
// the one we brought down/up above.
490+
String shutdownBookie2 = shutDownNonAuditorBookie(shutdownBookie1);
491+
492+
// since the first bookie that was brought down/up has come up, there is only
493+
// one bookie down at this time. Hence the lost bookie check shouldn't start
494+
// immediately; it will start 5 seconds after the second bookie went down
495+
assertFalse("audit of lost bookie isn't delayed", underReplicaLatch.await(2, TimeUnit.SECONDS));
496+
assertEquals("under replicated ledgers identified when it was not expected", 0,
497+
urLedgerList.size());
498+
499+
// wait for a total of 6 seconds(2+4) for the ledgers to get reported as under replicated
500+
Thread.sleep(4000);
501+
502+
// If the following checks pass, it means that auditing happened
503+
// after lostBookieRecoveryDelay during rolling upgrade as expected
504+
assertTrue("Ledger is not marked as underreplicated:" + ledgerId,
505+
urLedgerList.contains(ledgerId));
506+
Map<Long, String> urLedgerData = getUrLedgerData(urLedgerList);
507+
String data = urLedgerData.get(ledgerId);
508+
assertTrue("Bookie " + shutdownBookie1 + "wrongly listed as missing the ledger: " + data,
509+
!data.contains(shutdownBookie1));
510+
assertTrue("Bookie " + shutdownBookie2
511+
+ " is not listed in the ledger as missing replicas :" + data,
512+
data.contains(shutdownBookie2));
513+
LOG.info("*****************Test Complete");
514+
}
515+
452516
/**
453517
* Wait for ledger to be underreplicated, and to be missing all replicas specified
454518
*/
@@ -596,4 +660,22 @@ private String shutDownNonAuditorBookie() throws Exception {
596660
}
597661
return shutdownBookie(bkIndexDownBookie);
598662
}
663+
664+
private int getShutDownNonAuditorBookieIdx(String exclude) throws Exception {
665+
// shutdown bookie which is not an auditor
666+
int indexOf = bs.indexOf(getAuditorBookie());
667+
int bkIndexDownBookie = 0;
668+
for (int i = 0; i < bs.size(); i++) {
669+
if (i == indexOf || bs.get(i).getLocalAddress().toString().equals(exclude)) {
670+
continue;
671+
}
672+
bkIndexDownBookie = i;
673+
break;
674+
}
675+
return bkIndexDownBookie;
676+
}
677+
678+
private String shutDownNonAuditorBookie(String exclude) throws Exception {
679+
return shutdownBookie(getShutDownNonAuditorBookieIdx(exclude));
680+
}
599681
}

0 commit comments

Comments
 (0)