Skip to content

Commit 8f4081a

Browse files
authored
Merge pull request #153 from vshn/guide/rollbacks
Add AppCat rollback guide
2 parents c37ec58 + fc4809d commit 8f4081a

File tree

2 files changed

+384
-0
lines changed

2 files changed

+384
-0
lines changed
Lines changed: 383 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,383 @@
1+
= AppCat Rollback
2+
:page-aliases: how-tos/appcat/AppCatRollback.adoc
3+
4+
For on-call engineers responding to an AppCat release issue.
5+
Use these scripts to quickly revert affected XRs to a previous CompositionRevision and restore them once the issue is resolved.
6+
7+
== Before you start
8+
9+
[IMPORTANT]
10+
====
11+
Make sure you can run `kubectl` against the cluster that runs AppCat/Crossplane.
12+
You'll need permissions to **get/list/patch** XRs/XRDs and CompositionRevisions.
13+
====
14+
15+
[TIP]
16+
====
17+
If your user isn't a cluster admin, pass `--as-admin` to the scripts.
18+
They'll run all `kubectl` commands as `kubectl --as cluster-admin …` under the hood.
19+
====
20+
21+
Copy the two scripts below into files named **rollback.sh** and **unpin.sh**, then make them executable:
22+
23+
[source,bash]
24+
----
25+
# Copy/paste the contents from the "Scripts" section into these files:
26+
$EDITOR rollback.sh
27+
$EDITOR unpin.sh
28+
29+
# Make them executable (either run in-place or move onto your PATH)
30+
chmod +x rollback.sh unpin.sh
31+
# optional:
32+
# sudo mv rollback.sh /usr/local/bin/
33+
# sudo mv unpin.sh /usr/local/bin/
34+
----
35+
36+
== Scripts
37+
38+
[%collapsible]
39+
.rollback.sh
40+
====
41+
[source,bash]
42+
----
43+
#!/usr/bin/env bash
44+
45+
set -euo pipefail
46+
AS_ADMIN_FLAG=""
47+
KUBECTL="${KUBECTL:-kubectl}"
48+
49+
usage(){ cat <<'EOF'
50+
Usage: rollback.sh [options]
51+
-t TYPE XR type (CRD name). Comma-separated allowed.
52+
-i NAME Specific XR instance (only with a single -t).
53+
--all-instances Operate on all instances of the given TYPE(s).
54+
--all-types Operate on all instances across all XRD types.
55+
-r REVISION Explicit revision label (e.g. v3.52.0-v4.166.0).
56+
--as-admin Run kubectl commands as cluster-admin.
57+
-h|--help Show help.
58+
EOF
59+
}
60+
61+
fatal(){ echo "ERROR: $*" >&2; exit 1; }
62+
warn(){ echo "WARN: $*" >&2; }
63+
64+
kubectl_cmd() {
65+
"$KUBECTL" $AS_ADMIN_FLAG "$@"
66+
}
67+
68+
discover_all_types(){
69+
kubectl_cmd get xrd -o jsonpath='{range .items[*]}{.metadata.name}{"\n"}{end}' | sed '/^$/d'
70+
}
71+
72+
list_instances(){
73+
kubectl_cmd get "$1" -o jsonpath='{range .items[*]}{.metadata.name}{"\n"}{end}' || return 1
74+
}
75+
76+
autodetect_prev_label(){
77+
local t="$1" n="$2" cur base prev lbl
78+
cur="$(kubectl_cmd get "$t" "$n" -o jsonpath='{.spec.compositionRevisionRef.name}' || true)"
79+
[ -n "$cur" ] || fatal "$t/$n lacks .spec.compositionRevisionRef.name"
80+
base="$(printf '%s' "$cur" | sed -E 's/-[0-9a-f]{7,}$//')" || true
81+
[ -n "$base" ] || fatal "Cannot derive base from '$cur' for $t/$n"
82+
prev="$(
83+
kubectl_cmd get compositionrevisions.apiextensions.crossplane.io \
84+
--sort-by=.metadata.creationTimestamp -o name \
85+
| sed 's|.*/||' \
86+
| awk -v b="$base" -v c="$cur" 'index($0,b"-")==1{a[++n]=$0} END{for(i=1;i<=n;i++)if(a[i]==c&&i>1)print a[i-1]}'
87+
)"
88+
[ -n "$prev" ] || fatal "No previous CompositionRevision for base '$base' (current=$cur) on $t/$n"
89+
lbl="$(kubectl_cmd get compositionrevision "$prev" \
90+
-o go-template='{{ index .metadata.labels "metadata.appcat.vshn.io/revision" }}' 2>/dev/null || true)"
91+
[ -n "$lbl" ] && [ "$lbl" != "<no value>" ] || fatal "Previous CR '$prev' missing revision label"
92+
printf '%s\n' "$lbl"
93+
}
94+
95+
patch_xr(){
96+
echo "Patching $1/$2 revision=$3"
97+
kubectl_cmd patch "$1" "$2" --type=merge \
98+
-p "{\"spec\":{\"compositionRevisionSelector\":{\"matchLabels\":{\"metadata.appcat.vshn.io/revision\":\"$3\"}}}}"
99+
}
100+
101+
# arg parsing
102+
[ $# -gt 0 ] || { usage; exit 1; }
103+
ALL_TYPES=0 ALL_INST=0 TYPES_CSV="" NAME="" REV=""
104+
while [ $# -gt 0 ]; do
105+
case "$1" in
106+
-t) TYPES_CSV="${2:?}"; shift 2;;
107+
-i) NAME="${2:?}"; shift 2;;
108+
-r) REV="${2:?}"; shift 2;;
109+
--all-instances) ALL_INST=1; shift;;
110+
--all-types) ALL_TYPES=1; shift;;
111+
--as-admin) AS_ADMIN_FLAG="--as cluster-admin"; shift;;
112+
-h|--help) usage; exit 0;;
113+
-*) fatal "Unknown option $1";;
114+
*) fatal "Unexpected argument $1";;
115+
esac
116+
done
117+
118+
# validations
119+
[ $ALL_TYPES -eq 1 ] && [ -n "$TYPES_CSV" ] && fatal "Do not combine -t with --all-types"
120+
[ $ALL_TYPES -eq 0 ] && [ -z "$TYPES_CSV" ] && fatal "-t is required when --all-types is not set"
121+
[ -n "$NAME" ] && [ $ALL_INST -eq 1 ] && fatal "-i cannot be used with --all-instances"
122+
[ -n "$NAME" ] && [ $ALL_TYPES -eq 1 ] && fatal "-i cannot be used with --all-types"
123+
[ $ALL_TYPES -eq 0 ] && [ $ALL_INST -eq 0 ] && [ -z "$NAME" ] && fatal "Use -i or --all-instances (or --all-types)"
124+
125+
# build type list
126+
types=()
127+
if [ $ALL_TYPES -eq 1 ]; then
128+
types=()
129+
while IFS= read -r line; do
130+
[ -n "$line" ] && types+=("$line")
131+
done < <(discover_all_types)
132+
[ ${#types[@]} -gt 0 ] || fatal "No Crossplane XRDs found"
133+
else
134+
TYPES_CSV=${TYPES_CSV//[[:space:]]/}
135+
IFS=',' read -r -a types <<<"$TYPES_CSV"
136+
[ -n "$NAME" ] && [ ${#types[@]} -ne 1 ] && fatal "With -i NAME you must pass exactly one TYPE"
137+
fi
138+
139+
FAILED=0
140+
for t in "${types[@]}"; do
141+
instances=()
142+
if [ -n "$NAME" ]; then
143+
if ! kubectl_cmd get "$t" "$NAME" >/dev/null 2>&1; then
144+
warn "Instance not found: $t/$NAME; skipping"
145+
FAILED=1; continue
146+
fi
147+
instances+=("$NAME")
148+
else
149+
out="$(list_instances "$t" 2>/dev/null || true)"
150+
if [ -z "${out:-}" ]; then
151+
echo "No instances of $t; skipping."
152+
continue
153+
fi
154+
instances=()
155+
while IFS= read -r line; do
156+
[ -n "$line" ] && instances+=("$line")
157+
done <<<$out
158+
159+
fi
160+
161+
for n in "${instances[@]}"; do
162+
[ -n "$n" ] || { warn "Empty name for $t; skipping"; FAILED=1; continue; }
163+
rev="$REV"
164+
if [ -z "$rev" ]; then
165+
if ! rev="$(autodetect_prev_label "$t" "$n" 2>&1)"; then
166+
warn "$rev"; FAILED=1; continue
167+
fi
168+
fi
169+
if ! patch_xr "$t" "$n" "$rev"; then
170+
warn "Failed to patch $t/$n"; FAILED=1
171+
fi
172+
done
173+
done
174+
175+
[ $FAILED -eq 0 ] || exit 1
176+
echo "Done."
177+
----
178+
====
179+
180+
[%collapsible]
181+
.unpin.sh
182+
====
183+
[source,bash]
184+
----
185+
#!/usr/bin/env bash
186+
set -euo pipefail
187+
AS_ADMIN_FLAG=""
188+
KUBECTL="${KUBECTL:-kubectl}"
189+
190+
usage(){ cat <<'EOF'
191+
Usage: unpin.sh [options]
192+
-t TYPE XR type (CRD name). Comma-separated allowed.
193+
-i NAME Specific XR instance (only with a single -t).
194+
--all-instances Operate on all instances of the given TYPE(s).
195+
--all-types Operate on all instances across all XRD types.
196+
--as-admin Run kubectl commands as cluster-admin.
197+
-h|--help Show help.
198+
EOF
199+
}
200+
201+
fatal(){ echo "ERROR: $*" >&2; exit 1; }
202+
warn(){ echo "WARN: $*" >&2; }
203+
204+
kubectl_cmd() {
205+
"$KUBECTL" $AS_ADMIN_FLAG "$@"
206+
}
207+
208+
discover_all_types(){
209+
kubectl_cmd get xrd -o jsonpath='{range .items[*]}{.metadata.name}{"\n"}{end}' | sed '/^$/d'
210+
}
211+
212+
list_instances(){
213+
kubectl_cmd get "$1" -o jsonpath='{range .items[*]}{.metadata.name}{"\n"}{end}' || return 1
214+
}
215+
216+
unpin_xr(){
217+
echo "Unpinning $1/$2 (removing spec.compositionRevisionSelector)"
218+
kubectl_cmd patch "$1" "$2" --type=merge \
219+
-p '{"spec":{"compositionRevisionSelector":null}}'
220+
}
221+
222+
# arg parsing
223+
[ $# -gt 0 ] || { usage; exit 1; }
224+
ALL_TYPES=0 ALL_INST=0 TYPES_CSV="" NAME=""
225+
while [ $# -gt 0 ]; do
226+
case "$1" in
227+
-t) TYPES_CSV="${2:?}"; shift 2;;
228+
-i) NAME="${2:?}"; shift 2;;
229+
--all-instances) ALL_INST=1; shift;;
230+
--all-types) ALL_TYPES=1; shift;;
231+
--as-admin) AS_ADMIN_FLAG="--as cluster-admin"; shift;;
232+
-h|--help) usage; exit 0;;
233+
-*) fatal "Unknown option $1";;
234+
*) fatal "Unexpected argument $1";;
235+
esac
236+
done
237+
238+
# validations
239+
[ $ALL_TYPES -eq 1 ] && [ -n "$TYPES_CSV" ] && fatal "Do not combine -t with --all-types"
240+
[ $ALL_TYPES -eq 0 ] && [ -z "$TYPES_CSV" ] && fatal "-t is required when --all-types is not set"
241+
[ -n "$NAME" ] && [ $ALL_INST -eq 1 ] && fatal "-i cannot be used with --all-instances"
242+
[ -n "$NAME" ] && [ $ALL_TYPES -eq 1 ] && fatal "-i cannot be used with --all-types"
243+
[ $ALL_TYPES -eq 0 ] && [ $ALL_INST -eq 0 ] && [ -z "$NAME" ] && fatal "Use -i or --all-instances (or --all-types)"
244+
245+
types=()
246+
if [ $ALL_TYPES -eq 1 ]; then
247+
while IFS= read -r line; do
248+
[ -n "$line" ] && types+=("$line")
249+
done < <(discover_all_types)
250+
[ ${#types[@]} -gt 0 ] || fatal "No Crossplane XRDs found"
251+
else
252+
TYPES_CSV=${TYPES_CSV//[[:space:]]/}
253+
IFS=',' read -r -a types <<<"$TYPES_CSV"
254+
[ -n "$NAME" ] && [ ${#types[@]} -ne 1 ] && fatal "With -i NAME you must pass exactly one TYPE"
255+
fi
256+
257+
FAILED=0
258+
for t in "${types[@]}"; do
259+
instances=()
260+
if [ -n "$NAME" ]; then
261+
if ! kubectl_cmd get "$t" "$NAME" >/dev/null 2>&1; then
262+
warn "Instance not found: $t/$NAME; skipping"
263+
FAILED=1; continue
264+
fi
265+
instances+=("$NAME")
266+
else
267+
out="$(list_instances "$t" 2>/dev/null || true)"
268+
if [ -z "${out:-}" ]; then
269+
echo "No instances of $t; skipping."
270+
continue
271+
fi
272+
while IFS= read -r line; do
273+
[ -n "$line" ] && instances+=("$line")
274+
done <<<$out
275+
fi
276+
277+
for n in "${instances[@]}"; do
278+
[ -n "$n" ] || { warn "Empty name for $t; skipping"; FAILED=1; continue; }
279+
if ! unpin_xr "$t" "$n"; then
280+
warn "Failed to unpin $t/$n"; FAILED=1
281+
fi
282+
done
283+
done
284+
285+
[ $FAILED -eq 0 ] || exit 1
286+
echo "Done."
287+
----
288+
====
289+
290+
== What the rollback does
291+
292+
`rollback.sh` sets:
293+
294+
[source,yaml]
295+
----
296+
spec:
297+
compositionRevisionSelector:
298+
matchLabels:
299+
metadata.appcat.vshn.io/revision: <REV>
300+
----
301+
302+
If `-r <REV>` is not given, it autodetects the previous `CompositionRevision` relative to the XR's current revision and uses that CR's label `metadata.appcat.vshn.io/revision`.
303+
304+
It's idempotent: run it multiple times and it always picks "previous relative to current".
305+
306+
== Usage examples
307+
308+
*Single XR (autodetect previous)*
309+
[source,bash]
310+
----
311+
./rollback.sh -t xvshnnextclouds.vshn.appcat.vshn.io -i nextcloud-test-mg7hp
312+
----
313+
314+
*Single XR (pin to an explicit revision label)*
315+
[source,bash]
316+
----
317+
./rollback.sh -t xvshnnextclouds.vshn.appcat.vshn.io -i nextcloud-test-mg7hp -r v3.52.0-v4.166.0
318+
----
319+
320+
*All instances of one type (autodetect each)*
321+
[source,bash]
322+
----
323+
./rollback.sh -t xvshnnextclouds.vshn.appcat.vshn.io --all-instances
324+
----
325+
326+
*All types across the cluster (autodetect each)*
327+
[source,bash]
328+
----
329+
./rollback.sh --all-types
330+
----
331+
332+
*Return to automatic policy (remove the selector)*
333+
[source,bash]
334+
----
335+
./unpin.sh -t xvshnnextclouds.vshn.appcat.vshn.io -i nextcloud-test-mg7hp
336+
# or all instances of a type
337+
./unpin.sh -t xvshnnextclouds.vshn.appcat.vshn.io --all-instances
338+
# or all types
339+
./unpin.sh --all-types
340+
----
341+
342+
== Verify after patch
343+
344+
Check what the XR is pinned to and the currently referenced CR:
345+
346+
[source,bash]
347+
----
348+
TYPE=xvshnnextclouds.vshn.appcat.vshn.io
349+
NAME=nextcloud-test-mg7hp
350+
351+
echo "Selector (REV label) in XR:"
352+
kubectl get "$TYPE" "$NAME" -o jsonpath='{.spec.compositionRevisionSelector.matchLabels}'
353+
354+
echo "Watch for reconciliation"
355+
kubectl get "$TYPE" "$NAME" -w
356+
----
357+
358+
== Manually discover available revisions
359+
360+
*List all CompositionRevisions for a given base, oldest to newest, with labels*
361+
[source,bash]
362+
----
363+
BASE=vshnnextcloud.vshn.appcat.vshn.io
364+
for r in $(
365+
kubectl get compositionrevisions.apiextensions.crossplane.io \
366+
--sort-by=.metadata.creationTimestamp -o name \
367+
| sed 's|.*/||' \
368+
| awk -v b="$BASE" 'index($0, b "-")==1 { print }'
369+
); do
370+
printf "%-70s " "$r"
371+
kubectl get compositionrevision "$r" \
372+
-o go-template='{{ .metadata.creationTimestamp }} {{ index .metadata.labels "metadata.appcat.vshn.io/revision" }}{{"\n"}}'
373+
done
374+
----
375+
376+
== Operational patterns
377+
378+
* **Prefer autodetect:** Let the script find the previous revision relative to the current one. Works even after multiple prior rollbacks.
379+
* **Start small:** Roll back a single instance or all instances of a single type first to confirm the fix.
380+
* **Bulk rollback:** Use only for truly widespread incidents affecting many types at once.
381+
* **Pin explicitly:** When you already know the exact good revision label to apply across instances.
382+
* **Unpin after resolution:** Once the incident is fixed, run unpin.sh so healthy revisions roll out automatically again.
383+

docs/modules/ROOT/partials/nav.adoc

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -112,6 +112,7 @@
112112
**** xref:framework/quality-requirements/usability/provisioning-time.adoc[]
113113
**** xref:framework/quality-requirements/usability/logs.adoc[]
114114
** Runbooks
115+
*** xref:framework/runbooks/AppCatRollback.adoc[]
115116
*** xref:framework/runbooks/AppCatBackupJobError.adoc[]
116117
*** xref:framework/runbooks/GuaranteedUptimeTarget.adoc[]
117118
*** xref:framework/runbooks/AppCatHighAvailableStatefulsetWarning.adoc[]

0 commit comments

Comments
 (0)