Skip to content

Commit 12ff016

Browse files
committed
Fix cell0 access while cell0 waits for DB
There was a bug in the nova-controller in the ensureCell loop. If the cell0 was still waiting for the DB to be created but cell1 had all the prerequisites are done (DB, MQ) then the code tried to look up cell0 which was not in the internal cell map causing panic.
1 parent 333c943 commit 12ff016

File tree

3 files changed

+47
-34
lines changed

3 files changed

+47
-34
lines changed

controllers/nova_controller.go

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -392,9 +392,10 @@ func (r *NovaReconciler) Reconcile(ctx context.Context, req ctrl.Request) (resul
392392
}
393393

394394
// The cell0 is always handled first in the loop as we iterate on
395-
// orderedCellNames. So for any other cells we can get cell0 from cells
396-
// as it is already there.
397-
if cellName != Cell0Name && cellTemplate.HasAPIAccess && !cells[Cell0Name].IsReady() {
395+
// orderedCellNames. So for any other cells we can assume that if cell0
396+
// is not in the list then cell0 is not ready
397+
cell0Ready := (cells[Cell0Name] != nil && cells[Cell0Name].IsReady())
398+
if cellName != Cell0Name && cellTemplate.HasAPIAccess && !cell0Ready {
398399
allCellsReady = false
399400
skippedCells = append(skippedCells, cellName)
400401
util.LogForObject(

test/functional/base_test.go

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -533,6 +533,7 @@ func GetCellNames(novaName types.NamespacedName, cell string) CellNames {
533533
}
534534

535535
type NovaNames struct {
536+
Namespace string
536537
NovaName types.NamespacedName
537538
InternalNovaServiceName types.NamespacedName
538539
PublicNovaServiceName types.NamespacedName
@@ -599,7 +600,8 @@ func GetNovaNames(novaName types.NamespacedName, cellNames []string) NovaNames {
599600
}
600601

601602
return NovaNames{
602-
NovaName: novaName,
603+
Namespace: novaName.Namespace,
604+
NovaName: novaName,
603605
InternalNovaServiceName: types.NamespacedName{ // TODO replace for nova-internal
604606
Namespace: novaName.Namespace,
605607
Name: novaName.Name + "-internal",

test/functional/nova_multicell_test.go

Lines changed: 40 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -652,55 +652,65 @@ var _ = Describe("Nova multicell", func() {
652652
})
653653
When("cell1 DB and MQ create finishes before cell0 DB create", func() {
654654
BeforeEach(func() {
655-
DeferCleanup(k8sClient.Delete, ctx, CreateNovaSecret(namespace, SecretName))
655+
DeferCleanup(k8sClient.Delete, ctx, CreateNovaSecret(novaNames.Namespace, SecretName))
656656
DeferCleanup(
657657
k8sClient.Delete,
658658
ctx,
659-
CreateNovaMessageBusSecret(namespace, "mq-for-cell1-secret"),
659+
CreateNovaMessageBusSecret(cell0.CellName.Namespace, fmt.Sprintf("%s-secret", cell0.TransportURLName.Name)),
660660
)
661661
serviceSpec := corev1.ServiceSpec{Ports: []corev1.ServicePort{{Port: 3306}}}
662-
DeferCleanup(th.DeleteDBService, th.CreateDBService(namespace, "db-for-api", serviceSpec))
663-
DeferCleanup(th.DeleteDBService, th.CreateDBService(namespace, "db-for-cell1", serviceSpec))
662+
DeferCleanup(
663+
th.DeleteDBService,
664+
th.CreateDBService(
665+
novaNames.APIMariaDBDatabaseName.Namespace,
666+
novaNames.APIMariaDBDatabaseName.Name,
667+
serviceSpec))
668+
DeferCleanup(
669+
th.DeleteDBService,
670+
th.CreateDBService(
671+
cell1.MariaDBDatabaseName.Namespace,
672+
cell1.MariaDBDatabaseName.Name,
673+
serviceSpec))
664674

665675
spec := GetDefaultNovaSpec()
666-
cell0 := GetDefaultNovaCellTemplate()
667-
cell0["cellName"] = "cell0"
668-
cell0["cellDatabaseInstance"] = "db-for-api"
669-
cell0["cellDatabaseUser"] = "nova_cell0"
676+
cell0Template := GetDefaultNovaCellTemplate()
677+
cell0Template["cellDatabaseInstance"] = cell0.MariaDBDatabaseName.Name
678+
cell0Template["cellDatabaseUser"] = "nova_cell0"
670679

671-
cell1 := GetDefaultNovaCellTemplate()
672-
cell1["cellName"] = "cell1"
673-
cell1["cellDatabaseInstance"] = "db-for-cell1"
674-
cell1["cellDatabaseUser"] = "nova_cell1"
675-
cell1["cellMessageBusInstance"] = "mq-for-cell1"
680+
cell1Template := GetDefaultNovaCellTemplate()
681+
cell1Template["cellDatabaseInstance"] = cell1.MariaDBDatabaseName.Name
682+
cell1Template["cellDatabaseUser"] = "nova_cell1"
683+
cell1Template["cellMessageBusInstance"] = cell1.TransportURLName.Name
676684

677685
spec["cellTemplates"] = map[string]interface{}{
678-
"cell0": cell0,
679-
"cell1": cell1,
686+
"cell0": cell0Template,
687+
"cell1": cell1Template,
680688
}
681-
spec["apiDatabaseInstance"] = "db-for-api"
682-
spec["apiMessageBusInstance"] = "mq-for-api"
689+
spec["apiDatabaseInstance"] = novaNames.APIMariaDBDatabaseName.Name
690+
spec["apiMessageBusInstance"] = cell0.TransportURLName.Name
683691

684-
DeferCleanup(DeleteInstance, CreateNova(novaName, spec))
685-
keystoneAPIName := th.CreateKeystoneAPI(namespace)
692+
DeferCleanup(th.DeleteInstance, CreateNova(novaNames.NovaName, spec))
693+
keystoneAPIName := th.CreateKeystoneAPI(novaNames.Namespace)
686694
DeferCleanup(th.DeleteKeystoneAPI, keystoneAPIName)
687695
keystoneAPI := th.GetKeystoneAPI(keystoneAPIName)
688696
keystoneAPI.Status.APIEndpoints["internal"] = "http://keystone-internal-openstack.testing"
689697
Eventually(func(g Gomega) {
690698
g.Expect(k8sClient.Status().Update(ctx, keystoneAPI.DeepCopy())).Should(Succeed())
691699
}, timeout, interval).Should(Succeed())
692-
th.SimulateKeystoneServiceReady(novaKeystoneServiceName)
700+
th.SimulateKeystoneServiceReady(novaNames.KeystoneServiceName)
693701
})
694702

695-
// We cannot merge this test as is because if the tests run
696-
// in parallel then this test hangs forever (not even the global
697-
// ginkgo --timeout stops it). If you run it with --procs 1 locally
698-
// then it shows the panic and terminates
699-
// It("panics", func(ctx SpecContext) {
700-
// th.SimulateMariaDBDatabaseCompleted(cell1.MariaDBDatabaseName)
701-
// th.SimulateTransportURLReady(cell1.TransportURLName)
702-
703-
// // BUG: the nova-controller-manager panics but it should not
704-
// })
703+
It("waits for cell0 DB to be created", func(ctx SpecContext) {
704+
th.SimulateMariaDBDatabaseCompleted(cell1.MariaDBDatabaseName)
705+
th.SimulateTransportURLReady(cell1.TransportURLName)
706+
// NOTE(gibi): before the fix https://github.com/openstack-k8s-operators/nova-operator/pull/356
707+
// nova-controller panic at this point and test would hang
708+
th.ExpectCondition(
709+
novaNames.NovaName,
710+
ConditionGetterFunc(NovaConditionGetter),
711+
novav1.NovaAllCellsReadyCondition,
712+
corev1.ConditionFalse,
713+
)
714+
})
705715
})
706716
})

0 commit comments

Comments
 (0)