|
| 1 | +#!/bin/bash |
| 2 | + |
| 3 | +# Valkey Node Readiness Check for Kubernetes |
| 4 | +# Returns 0 (ready) if any of these conditions are met: |
| 5 | +# 1. Cluster state is "ok" |
| 6 | +# 2. Node has zero slots allocated |
| 7 | +# 3. 300 seconds have elapsed since pod started |
| 8 | + |
| 9 | +set -x |
| 10 | + |
| 11 | +# shellcheck source=./utils.sh |
| 12 | +. /scripts/utils.sh |
| 13 | + |
| 14 | +# Configuration |
| 15 | +TIMEOUT_SECONDS="${TIMEOUT_SECONDS:-300}" |
| 16 | + |
| 17 | +# Function to check if timeout has elapsed since Valkey started |
| 18 | +check_timeout() { |
| 19 | + local info_output |
| 20 | + info_output=$(valkey_cli 127.0.0.1 6379 -t 1 -c INFO server 2>/dev/null || echo "") |
| 21 | + |
| 22 | + # Extract uptime_in_seconds from INFO output |
| 23 | + local uptime |
| 24 | + uptime=$(echo "$info_output" | grep "^uptime_in_seconds:" | cut -d: -f2 | tr -d '\r') |
| 25 | + |
| 26 | + if [ -z "$uptime" ]; then |
| 27 | + echo "Warning: Could not retrieve Valkey uptime" >&2 |
| 28 | + return 1 |
| 29 | + fi |
| 30 | + |
| 31 | + if [ "$uptime" -ge $TIMEOUT_SECONDS ]; then |
| 32 | + echo "Readiness check passed: Valkey uptime of ${uptime}s exceeds timeout of ${TIMEOUT_SECONDS}s" |
| 33 | + return 0 |
| 34 | + fi |
| 35 | + return 1 |
| 36 | +} |
| 37 | + |
| 38 | +# Function to check cluster state |
| 39 | +check_cluster_state() { |
| 40 | + local cluster_info |
| 41 | + cluster_info=$(valkey_cli 127.0.0.1 6379 -t 1 -c CLUSTER INFO 2>/dev/null || echo "") |
| 42 | + |
| 43 | + if echo "$cluster_info" | grep -q "cluster_state:ok"; then |
| 44 | + echo "Readiness check passed: cluster state is ok" |
| 45 | + return 0 |
| 46 | + fi |
| 47 | + return 1 |
| 48 | +} |
| 49 | + |
| 50 | +# Function to check slot allocation |
| 51 | +check_slots() { |
| 52 | + local nodes_info |
| 53 | + nodes_info=$(valkey_cli 127.0.0.1 6379 -t 1 -c CLUSTER NODES 2>/dev/null || echo "") |
| 54 | + |
| 55 | + # Find the current node (marked with "myself") |
| 56 | + local myself_line |
| 57 | + myself_line=$(echo "$nodes_info" | grep "myself" || echo "") |
| 58 | + |
| 59 | + if [ -z "$myself_line" ]; then |
| 60 | + echo "Warning: Could not find current node in cluster nodes output" >&2 |
| 61 | + return 1 |
| 62 | + fi |
| 63 | + |
| 64 | + # Check if the line contains any slot ranges (format: [slot-slot] or single slots) |
| 65 | + # Slots appear after the address and flags, typically after the 8th field |
| 66 | + if ! echo "$myself_line" | grep -qE '\[?[0-9]+-?[0-9]*\]?'; then |
| 67 | + echo "Readiness check passed: node has zero slots allocated" |
| 68 | + return 0 |
| 69 | + fi |
| 70 | + |
| 71 | + return 1 |
| 72 | +} |
| 73 | + |
| 74 | +# Main readiness check logic |
| 75 | +main() { |
| 76 | + # Check condition 3: timeout elapsed |
| 77 | + if check_timeout; then |
| 78 | + exit 0 |
| 79 | + fi |
| 80 | + |
| 81 | + # Check condition 1: cluster state ok |
| 82 | + if check_cluster_state; then |
| 83 | + exit 0 |
| 84 | + fi |
| 85 | + |
| 86 | + # Check condition 2: zero slots allocated |
| 87 | + if check_slots; then |
| 88 | + exit 0 |
| 89 | + fi |
| 90 | + |
| 91 | + # None of the conditions met |
| 92 | + echo "Readiness check failed: waiting for cluster state ok, zero slots, or timeout" |
| 93 | + exit 1 |
| 94 | +} |
| 95 | + |
| 96 | +main |
0 commit comments