Skip to content
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
124 changes: 124 additions & 0 deletions internal/controller/scripts/readiness.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,124 @@
#!/bin/bash

# Valkey Node Readiness Check for Kubernetes
# Returns 0 (ready) if any of these conditions are met:
# 1. Cluster state is "ok"
# 2. Node has zero slots allocated
# 3. Node doesn't know about any other nodes (single node)
# 4. 300 seconds have elapsed since pod started

set -x

# shellcheck source=./utils.sh
. /scripts/utils.sh

# Configuration
TIMEOUT_SECONDS="${TIMEOUT_SECONDS:-300}"

# Function to check if timeout has elapsed since Valkey started
check_timeout() {
local info_output
info_output=$(valkey_cli 127.0.0.1 6379 -t 1 -c INFO server 2>/dev/null || echo "")

# Extract uptime_in_seconds from INFO output
local uptime
uptime=$(echo "$info_output" | grep "^uptime_in_seconds:" | cut -d: -f2 | tr -d '\r')

if [ -z "$uptime" ]; then
echo "Warning: Could not retrieve Valkey uptime" >&2
return 1
fi

if [ "$uptime" -ge $TIMEOUT_SECONDS ]; then
echo "Readiness check passed: Valkey uptime of ${uptime}s exceeds timeout of ${TIMEOUT_SECONDS}s"
return 0
fi
return 1
}

# Function to check cluster state
check_cluster_state() {
local cluster_info
cluster_info=$(valkey_cli 127.0.0.1 6379 -t 1 -c CLUSTER INFO 2>/dev/null || echo "")

if echo "$cluster_info" | grep -q "cluster_state:ok"; then
echo "Readiness check passed: cluster state is ok"
return 0
fi
return 1
}

# Function to check slot allocation
check_slots() {
local nodes_info
nodes_info=$(valkey_cli 127.0.0.1 6379 -t 1 -c CLUSTER NODES 2>/dev/null || echo "")

# Find the current node (marked with "myself")
local myself_line
myself_line=$(echo "$nodes_info" | grep "myself" || echo "")

if [ -z "$myself_line" ]; then
echo "Warning: Could not find current node in cluster nodes output" >&2
return 1
fi

# Check if the line contains any slot ranges (format: [slot-slot] or single slots)
# Slots appear after the address and flags, typically after the 8th field
if ! echo "$myself_line" | grep -qE '\[?[0-9]+-?[0-9]*\]?'; then
echo "Readiness check passed: node has zero slots allocated"
return 0
fi

return 1
}

# Function to check if node doesn't know about any other nodes
check_single_node() {
local nodes_info
nodes_info=$(valkey_cli 127.0.0.1 6379 -t 1 -c CLUSTER NODES 2>/dev/null || echo "")

if [ -z "$nodes_info" ]; then
echo "Warning: Could not retrieve cluster nodes information" >&2
return 1
fi

# Count the number of nodes (each node is one line)
local node_count
node_count=$(echo "$nodes_info" | grep -c "^")

if [ "$node_count" -eq 1 ]; then
echo "Readiness check passed: node doesn't know about any other nodes"
return 0
fi

return 1
}

# Main readiness check logic
main() {
# Check condition 4: timeout elapsed
if check_timeout; then
exit 0
fi

# Check condition 1: cluster state ok
if check_cluster_state; then
exit 0
fi

# Check condition 3: single node (doesn't know about other nodes)
if check_single_node; then
exit 0
fi

# Check condition 2: zero slots allocated
if check_slots; then
exit 0
fi

# None of the conditions met
echo "Readiness check failed: waiting for cluster state ok, zero slots, single node, or timeout"
exit 1
}

main
6 changes: 6 additions & 0 deletions internal/controller/valkeycluster_controller_configmap.go
Original file line number Diff line number Diff line change
Expand Up @@ -40,12 +40,18 @@ func (r *ValkeyClusterReconciler) upsertConfigMap(ctx context.Context, valkeyClu
logger.Error(err, "failed to read utils.sh")
return "", err
}
readiness, err := scripts.ReadFile("scripts/readiness.sh")
if err != nil {
logger.Error(err, "failed to read readiness.sh")
return "", err
}
ls := labelsForValkeyCluster(valkeyCluster.Name)
cmData := map[string]string{
"pre_stop.sh": string(preStop),
"post_start.sh": string(postStart),
"meet.sh": string(meet),
"utils.sh": string(utils),
"readiness.sh": string(readiness),
}
valkeyConfContent, err := getValkeyConfigContent(valkeyCluster)
if err != nil {
Expand Down
12 changes: 8 additions & 4 deletions internal/controller/valkeycluster_controller_statefulset.go
Original file line number Diff line number Diff line change
Expand Up @@ -169,11 +169,15 @@ func (r *ValkeyClusterReconciler) statefulSet(name string, size int32, valkeyClu
},
ReadinessProbe: &corev1.Probe{
ProbeHandler: corev1.ProbeHandler{
TCPSocket: &corev1.TCPSocketAction{
Port: intstr.FromInt(VALKEY_PORT),
Exec: &corev1.ExecAction{
Command: []string{"/bin/bash", "/scripts/readiness.sh"},
},
},
InitialDelaySeconds: valkeyCluster.Spec.InitialDelaySeconds,
TimeoutSeconds: 5,
PeriodSeconds: 10,
SuccessThreshold: 1,
FailureThreshold: 100,
},
LivenessProbe: &corev1.Probe{
ProbeHandler: corev1.ProbeHandler{
Expand Down Expand Up @@ -533,8 +537,8 @@ func (r *ValkeyClusterReconciler) compareActualToDesiredStatefulSet(ctx context.
log.Info(fmt.Sprintf("StatefulSet %s Env is different: %s", stsName, cmp.Diff(actual.Spec.Template.Spec.Containers[0].Env, desired.Spec.Template.Spec.Containers[0].Env)))
diff = true
}
if !cmp.Equal(actual.Spec.Template.Spec.Containers[0].ReadinessProbe.InitialDelaySeconds, desired.Spec.Template.Spec.Containers[0].ReadinessProbe.InitialDelaySeconds) {
log.Info(fmt.Sprintf("StatefulSet %s ReadinessProbe.InitialDelaySeconds is different: %s", stsName, cmp.Diff(actual.Spec.Template.Spec.Containers[0].ReadinessProbe.InitialDelaySeconds, desired.Spec.Template.Spec.Containers[0].ReadinessProbe.InitialDelaySeconds)))
if !cmp.Equal(actual.Spec.Template.Spec.Containers[0].ReadinessProbe, desired.Spec.Template.Spec.Containers[0].ReadinessProbe) {
log.Info(fmt.Sprintf("StatefulSet %s ReadinessProbe is different: %s", stsName, cmp.Diff(actual.Spec.Template.Spec.Containers[0].ReadinessProbe, desired.Spec.Template.Spec.Containers[0].ReadinessProbe)))
diff = true
}

Expand Down