Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion docs/runbook/change-citus-node-pool-machine-type.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ Need to Change Machine Type for Citus Node Pool(s)

## Prerequisites

- Have `jq` installed
- Have `jq` and `yq` installed
- kubectl is pointing to the cluster you want to change the machine type for
- All bash commands assume your working directory is `docs/runbook/scripts`

Expand Down
2 changes: 1 addition & 1 deletion docs/runbook/create-disk-snapshot-for-citus-cluster.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ Need to create disk snapshots for Citus cluster(s)
## Prerequisites

- Have access to a running Citus cluster deployed by the `hedera-mirror` chart
- Have `jq` installed
- Have `jq` and `yq` installed
- All bash commands assume your working directory is `docs/runbook/scripts`
- The kubectl context is set to the cluster you want to create snapshots from

Expand Down
2 changes: 1 addition & 1 deletion docs/runbook/reduce-disksize-citus.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ Citus disks are over provisioned and need to be reduced in size.
## Prerequisites

- All Citus PVCS are defined in GiB
- `jq` is installed
- `jq` and `yq` is installed
- The kubectl context is set to the cluster with the over-sized disk
- Need to ensure that `zfs.(coordinator|worker).diskSize` is less than any disk you are reducing
- Follow the [create snapshot](create-disk-snapshot-for-citus-cluster.md) runbook to create a snapshot for cluster before running this runbook
Expand Down
2 changes: 1 addition & 1 deletion docs/runbook/restore-citus-from-disk-snapshot.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ Need to restore Citus cluster from disk snapshots
## Prerequisites

- Snapshots of disks were created by following the [create snapshot](create-disk-snapshot-for-citus-cluster.md) runbook
- Have `jq` and `ksd`(kubernetes secret decrypter) installed
- Have `jq`, `yq`, and `ksd`(kubernetes secret decrypter) installed
- The snapshots are from a compatible version of `postgres`
- The `target cluster` has a running Citus cluster deployed with `hedera-mirror` chart
- The `target cluster` you are restoring to doesn't have any pvcs with a size larger than the size of the pvc in the
Expand Down
2 changes: 1 addition & 1 deletion docs/runbook/restore-citus-from-stackgres-backup.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ Need to restore Citus cluster from a StackGres sharded backup

## Prerequisites

- Have `jq` and `ksd`(kubernetes secret decrypter) installed
- Have `jq`, `yq`, and `ksd`(kubernetes secret decrypter) installed
- The cluster has a running Citus cluster deployed with `hedera-mirror` chart
- StackGresShardedCluster backup is enabled
- All bash commands assume your working directory is `docs/runbook/scripts`
Expand Down
2 changes: 1 addition & 1 deletion docs/runbook/scripts/restore-volume-snapshot.sh
Original file line number Diff line number Diff line change
Expand Up @@ -123,7 +123,7 @@ function prepareDiskReplacement() {
for namespace in "${NAMESPACES[@]}"; do
unrouteTraffic "${namespace}"
kubectl delete sgshardedbackups.stackgres.io -n "${namespace}" --all
pauseCitus "${namespace}"
pauseCitus "${namespace}" "true"
done

# Spin down existing citus node pools
Expand Down
105 changes: 79 additions & 26 deletions docs/runbook/scripts/upgrade-k8s-version-citus.sh
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,30 @@ set -euo pipefail

source ./utils.sh

versionGreater() {
local raw1="$1"
local raw2="$2"

local core1="${raw1%%-*}"
local core2="${raw2%%-*}"

if [[ "$(printf '%s\n' "$core1" "$core2" | sort -V | head -n1)" != "$core1" ]]; then
return 0
elif [[ "$core1" == "$core2" ]]; then
local build1 build2
build1="$(echo "$raw1" | sed -n 's/.*gke\.//p')"
build2="$(echo "$raw2" | sed -n 's/.*gke\.//p')"

if [[ -n "$build1" && -n "$build2" ]]; then
if [[ "$build1" -gt "$build2" ]]; then
return 0
fi
fi
fi

return 1
}

NAMESPACES=($(kubectl get sgshardedclusters.stackgres.io -A -o jsonpath='{.items[*].metadata.namespace}'))

GCP_PROJECT="$(readUserInput "Enter GCP Project for target: ")"
Expand Down Expand Up @@ -38,7 +62,12 @@ VERSION="$(readUserInput "Enter the new Kubernetes version: ")"
if [[ -z "${VERSION}" ]]; then
log "VERSION is not set and is required. Exiting"
exit 1
else
fi

UPGRADE_MASTER="$(readUserInput "Do you want to upgrade the master to ${VERSION}? (y/n): ")"
UPGRADE_MASTER=$(echo "${UPGRADE_MASTER}" | tr '[:upper:]' '[:lower:]')

if [[ "${UPGRADE_MASTER}" == "yes" || "${UPGRADE_MASTER}" == "y" ]]; then
log "Checking if version ${VERSION} is valid for the cluster master"
MASTER_SUPPORTED=$(gcloud container get-server-config \
--location="${GCP_K8S_CLUSTER_REGION}" \
Expand All @@ -51,19 +80,29 @@ else
exit 1
fi

log "Checking if version ${VERSION} is valid for node pools..."
POOLS_SUPPORTED=$(gcloud container get-server-config \
--location="${GCP_K8S_CLUSTER_REGION}" \
CURRENT_MASTER_VERSION=$(gcloud container clusters describe "${GCP_K8S_CLUSTER_NAME}" \
--region="${GCP_K8S_CLUSTER_REGION}" \
--project="${GCP_PROJECT}" \
--format="json(validNodeVersions)" |
jq -r --arg VERSION "${VERSION}" 'any(.validNodeVersions[]; . == $VERSION)')
--format="value(currentMasterVersion)")

if [[ "${POOLS_SUPPORTED}" != "true" ]]; then
log "Node pool '${pool}' does not support version ${VERSION}. Exiting."
if ! versionGreater "${VERSION}" "${CURRENT_MASTER_VERSION}"; then
log "Version ${VERSION} must be greater than the current master version ${CURRENT_MASTER_VERSION}. Exiting."
exit 1
fi
fi

log "Checking if version ${VERSION} is valid for node pools..."
POOLS_SUPPORTED=$(gcloud container get-server-config \
--location="${GCP_K8S_CLUSTER_REGION}" \
--project="${GCP_PROJECT}" \
--format="json(validNodeVersions)" |
jq -r --arg VERSION "${VERSION}" 'any(.validNodeVersions[]; . == $VERSION)')

if [[ "${POOLS_SUPPORTED}" != "true" ]]; then
log "Version ${VERSION} is not supported by node pools. Exiting."
exit 1
fi

AVAILABLE_POOLS="$(gcloud container node-pools list --project="${GCP_PROJECT}" --cluster="${GCP_K8S_CLUSTER_NAME}" --region="${GCP_K8S_CLUSTER_REGION}" --format="json(name)" | jq -r '.[].name' | tr '\n' ' ')"
POOLS_TO_UPDATE_INPUT="$(readUserInput "Enter the node pools(${AVAILABLE_POOLS}) to update (space-separated): ")"
if [[ -z "${POOLS_TO_UPDATE_INPUT}" ]]; then
Expand All @@ -76,22 +115,20 @@ else
done
fi

while true; do
SYSTEM_CONFIG_FILE="$(readUserInput 'Enter path to Linux config file (leave blank to skip): ')"

if [[ -z "${SYSTEM_CONFIG_FILE}" ]]; then
break
elif [[ -f "${SYSTEM_CONFIG_FILE}" ]]; then
break
else
log "File '${SYSTEM_CONFIG_FILE}' does not exist. Please enter a valid path or leave blank to skip."
fi
done

POOLS_WITH_CITUS_ROLE=()
POOLS_WITHOUT_CITUS_ROLE=()

for pool in "${POOLS_TO_UPDATE[@]}"; do
CURRENT_POOL_VERSION=$(gcloud container node-pools describe "${pool}" \
--project="${GCP_PROJECT}" \
--cluster="${GCP_K8S_CLUSTER_NAME}" \
--region="${GCP_K8S_CLUSTER_REGION}" \
--format="value(version)")

if ! versionGreater "${VERSION}" "${CURRENT_POOL_VERSION}"; then
log "Version ${VERSION} must be greater than current version ${CURRENT_POOL_VERSION} for pool ${pool}. Skipping."
exit 1
fi
LABELS_JSON=$(gcloud container node-pools describe "${pool}" \
--project="${GCP_PROJECT}" \
--cluster="${GCP_K8S_CLUSTER_NAME}" \
Expand All @@ -105,6 +142,18 @@ for pool in "${POOLS_TO_UPDATE[@]}"; do
fi
done

while true; do
SYSTEM_CONFIG_FILE="$(readUserInput 'Enter path to Linux config file (leave blank to skip): ')"

if [[ -z "${SYSTEM_CONFIG_FILE}" ]]; then
break
elif [[ -f "${SYSTEM_CONFIG_FILE}" ]]; then
break
else
log "File '${SYSTEM_CONFIG_FILE}' does not exist. Please enter a valid path or leave blank to skip."
fi
done

function upgradePool() {
local pool="$1"
log "Upgrading node pool: ${pool}"
Expand Down Expand Up @@ -139,12 +188,16 @@ function upgradeCitusPools() {
done
}

log "Upgrading master to Kubernetes version ${VERSION}"
gcloud container clusters upgrade "${GCP_K8S_CLUSTER_NAME}" \
--master \
--cluster-version="${VERSION}" \
--location="${GCP_K8S_CLUSTER_REGION}" \
--project="${GCP_PROJECT}"
if [[ "${UPGRADE_MASTER}" == "yes" || "${UPGRADE_MASTER}" == "y" ]]; then
log "Upgrading master to Kubernetes version ${VERSION}"
gcloud container clusters upgrade "${GCP_K8S_CLUSTER_NAME}" \
--master \
--cluster-version="${VERSION}" \
--location="${GCP_K8S_CLUSTER_REGION}" \
--project="${GCP_PROJECT}"
else
log "Skipping master upgrade as requested."
fi

for pool in "${POOLS_WITHOUT_CITUS_ROLE[@]}"; do
upgradePool "${pool}"
Expand Down
Loading
Loading