`ensure_stack_deployed` is now somewhat more reliable

Closes coop-cloud/abra#165.
This commit is contained in:
decentral1se 2021-06-15 23:52:57 +02:00 committed by decentral1se
parent 57f3f96bbc
commit 93714a593b
No known key found for this signature in database
GPG Key ID: 5E2EF5A63E3718CC
4 changed files with 161 additions and 67 deletions

138
abra
View File

@ -169,13 +169,13 @@ printf -- "cat <<'EOM' >&2\n%s\nEOM\n" "$1"; }; error() {
[[ -n $1 ]] && stderr "$1"; stderr "$usage"; _return 1; }; _return() { [[ -n $1 ]] && stderr "$1"; stderr "$usage"; _return 1; }; _return() {
printf -- "exit %d\n" "$1"; exit "$1"; }; set -e; trimmed_doc=${DOC:1:2451} printf -- "exit %d\n" "$1"; exit "$1"; }; set -e; trimmed_doc=${DOC:1:2451}
usage=${DOC:40:1842}; digest=c7bae usage=${DOC:40:1842}; digest=c7bae
shorts=(-e -b -s -C -U -h -d -v -n '' '' '' '' '' '' '' '' '' '' '' '' '' '' '' '' '') shorts=(-C -n -U -e -b -d -h -s -v '' '' '' '' '' '' '' '' '' '' '' '' '' '' '' '' '')
longs=(--env --branch --stack --skip-check --skip-update --help --debug --verbose --no-prompt --status --server --type --domain --app-name --pass --secrets --all --update --force --fast --chaos --volumes --no-tty --user --bump --dev) longs=(--skip-check --no-prompt --skip-update --env --branch --debug --help --stack --verbose --status --server --type --domain --app-name --pass --secrets --all --update --force --fast --chaos --volumes --no-tty --user --bump --dev)
argcounts=(1 1 1 0 0 0 0 0 0 0 1 1 1 1 0 0 0 0 0 0 0 0 0 1 0 0); node_0(){ argcounts=(0 0 0 1 1 0 0 1 0 0 1 1 1 1 0 0 0 0 0 0 0 0 0 1 0 0); node_0(){
value __env 0; }; node_1(){ value __branch 1; }; node_2(){ value __stack 2; } switch __skip_check 0; }; node_1(){ switch __no_prompt 1; }; node_2(){
node_3(){ switch __skip_check 3; }; node_4(){ switch __skip_update 4; } switch __skip_update 2; }; node_3(){ value __env 3; }; node_4(){
node_5(){ switch __help 5; }; node_6(){ switch __debug 6; }; node_7(){ value __branch 4; }; node_5(){ switch __debug 5; }; node_6(){ switch __help 6; }
switch __verbose 7; }; node_8(){ switch __no_prompt 8; }; node_9(){ node_7(){ value __stack 7; }; node_8(){ switch __verbose 8; }; node_9(){
switch __status 9; }; node_10(){ value __server 10; }; node_11(){ switch __status 9; }; node_10(){ value __server 10; }; node_11(){
value __type 11; }; node_12(){ value __domain 12; }; node_13(){ value __type 11; }; node_12(){ value __domain 12; }; node_13(){
value __app_name 13; }; node_14(){ switch __pass 14; }; node_15(){ value __app_name 13; }; node_14(){ switch __pass 14; }; node_15(){
@ -245,24 +245,24 @@ required 80; }; node_157(){
either 86 91 94 100 101 102 103 104 106 107 108 112 114 118 119 124 125 128 129 130 133 135 136 137 139 140 143 144 145 146 147 148 150 151 152 155 156 either 86 91 94 100 101 102 103 104 106 107 108 112 114 118 119 124 125 128 129 130 133 135 136 137 139 140 143 144 145 146 147 148 150 151 152 155 156
}; node_158(){ required 157; }; cat <<<' docopt_exit() { }; node_158(){ required 157; }; cat <<<' docopt_exit() {
[[ -n $1 ]] && printf "%s\n" "$1" >&2; printf "%s\n" "${DOC:40:1842}" >&2 [[ -n $1 ]] && printf "%s\n" "$1" >&2; printf "%s\n" "${DOC:40:1842}" >&2
exit 1; }'; unset var___env var___branch var___stack var___skip_check \ exit 1; }'; unset var___skip_check var___no_prompt var___skip_update var___env \
var___skip_update var___help var___debug var___verbose var___no_prompt \ var___branch var___debug var___help var___stack var___verbose var___status \
var___status var___server var___type var___domain var___app_name var___pass \ var___server var___type var___domain var___app_name var___pass var___secrets \
var___secrets var___all var___update var___force var___fast var___chaos \ var___all var___update var___force var___fast var___chaos var___volumes \
var___volumes var___no_tty var___user var___bump var___dev var__type_ \ var___no_tty var___user var___bump var___dev var__type_ var__app_ \
var__app_ var__service_ var__version_ var__src_ var__dst_ var__backup_file_ \ var__service_ var__version_ var__src_ var__dst_ var__backup_file_ var__args_ \
var__args_ var__secret_ var__cmd_ var__data_ var__volume_ var__command_ \ var__secret_ var__cmd_ var__data_ var__volume_ var__command_ var__recipe_ \
var__recipe_ var__host_ var__user_ var__port_ var__provider_ var__subcommands_ \ var__host_ var__user_ var__port_ var__provider_ var__subcommands_ var_app \
var_app var_list var_ls var_new var_backup var_deploy var_check var_version \ var_list var_ls var_new var_backup var_deploy var_check var_version var_config \
var_config var_cp var_logs var_ps var_restore var_rm var_delete var_run \ var_cp var_logs var_ps var_restore var_rm var_delete var_run var_rollback \
var_rollback var_secret var_generate var_insert var_undeploy var_volume \ var_secret var_generate var_insert var_undeploy var_volume var_recipe \
var_recipe var_create var_release var_versions var_server var_add var___ \ var_create var_release var_versions var_server var_add var___ var_init \
var_init var_apps var_upgrade var_doctor var_help; parse 158 "$@" var_apps var_upgrade var_doctor var_help; parse 158 "$@"
local prefix=${DOCOPT_PREFIX:-''}; unset "${prefix}__env" "${prefix}__branch" \ local prefix=${DOCOPT_PREFIX:-''}; unset "${prefix}__skip_check" \
"${prefix}__stack" "${prefix}__skip_check" "${prefix}__skip_update" \ "${prefix}__no_prompt" "${prefix}__skip_update" "${prefix}__env" \
"${prefix}__help" "${prefix}__debug" "${prefix}__verbose" \ "${prefix}__branch" "${prefix}__debug" "${prefix}__help" "${prefix}__stack" \
"${prefix}__no_prompt" "${prefix}__status" "${prefix}__server" \ "${prefix}__verbose" "${prefix}__status" "${prefix}__server" "${prefix}__type" \
"${prefix}__type" "${prefix}__domain" "${prefix}__app_name" "${prefix}__pass" \ "${prefix}__domain" "${prefix}__app_name" "${prefix}__pass" \
"${prefix}__secrets" "${prefix}__all" "${prefix}__update" "${prefix}__force" \ "${prefix}__secrets" "${prefix}__all" "${prefix}__update" "${prefix}__force" \
"${prefix}__fast" "${prefix}__chaos" "${prefix}__volumes" "${prefix}__no_tty" \ "${prefix}__fast" "${prefix}__chaos" "${prefix}__volumes" "${prefix}__no_tty" \
"${prefix}__user" "${prefix}__bump" "${prefix}__dev" "${prefix}_type_" \ "${prefix}__user" "${prefix}__bump" "${prefix}__dev" "${prefix}_type_" \
@ -279,15 +279,15 @@ local prefix=${DOCOPT_PREFIX:-''}; unset "${prefix}__env" "${prefix}__branch" \
"${prefix}recipe" "${prefix}create" "${prefix}release" "${prefix}versions" \ "${prefix}recipe" "${prefix}create" "${prefix}release" "${prefix}versions" \
"${prefix}server" "${prefix}add" "${prefix}__" "${prefix}init" "${prefix}apps" \ "${prefix}server" "${prefix}add" "${prefix}__" "${prefix}init" "${prefix}apps" \
"${prefix}upgrade" "${prefix}doctor" "${prefix}help" "${prefix}upgrade" "${prefix}doctor" "${prefix}help"
eval "${prefix}"'__skip_check=${var___skip_check:-false}'
eval "${prefix}"'__no_prompt=${var___no_prompt:-false}'
eval "${prefix}"'__skip_update=${var___skip_update:-false}'
eval "${prefix}"'__env=${var___env:-}' eval "${prefix}"'__env=${var___env:-}'
eval "${prefix}"'__branch=${var___branch:-}' eval "${prefix}"'__branch=${var___branch:-}'
eval "${prefix}"'__stack=${var___stack:-}'
eval "${prefix}"'__skip_check=${var___skip_check:-false}'
eval "${prefix}"'__skip_update=${var___skip_update:-false}'
eval "${prefix}"'__help=${var___help:-false}'
eval "${prefix}"'__debug=${var___debug:-false}' eval "${prefix}"'__debug=${var___debug:-false}'
eval "${prefix}"'__help=${var___help:-false}'
eval "${prefix}"'__stack=${var___stack:-}'
eval "${prefix}"'__verbose=${var___verbose:-false}' eval "${prefix}"'__verbose=${var___verbose:-false}'
eval "${prefix}"'__no_prompt=${var___no_prompt:-false}'
eval "${prefix}"'__status=${var___status:-false}' eval "${prefix}"'__status=${var___status:-false}'
eval "${prefix}"'__server=${var___server:-}' eval "${prefix}"'__server=${var___server:-}'
eval "${prefix}"'__type=${var___type:-}' eval "${prefix}"'__type=${var___type:-}'
@ -355,9 +355,9 @@ eval "${prefix}"'upgrade=${var_upgrade:-false}'
eval "${prefix}"'doctor=${var_doctor:-false}' eval "${prefix}"'doctor=${var_doctor:-false}'
eval "${prefix}"'help=${var_help:-false}'; local docopt_i=1 eval "${prefix}"'help=${var_help:-false}'; local docopt_i=1
[[ $BASH_VERSION =~ ^4.3 ]] && docopt_i=2; for ((;docopt_i>0;docopt_i--)); do [[ $BASH_VERSION =~ ^4.3 ]] && docopt_i=2; for ((;docopt_i>0;docopt_i--)); do
declare -p "${prefix}__env" "${prefix}__branch" "${prefix}__stack" \ declare -p "${prefix}__skip_check" "${prefix}__no_prompt" \
"${prefix}__skip_check" "${prefix}__skip_update" "${prefix}__help" \ "${prefix}__skip_update" "${prefix}__env" "${prefix}__branch" \
"${prefix}__debug" "${prefix}__verbose" "${prefix}__no_prompt" \ "${prefix}__debug" "${prefix}__help" "${prefix}__stack" "${prefix}__verbose" \
"${prefix}__status" "${prefix}__server" "${prefix}__type" "${prefix}__domain" \ "${prefix}__status" "${prefix}__server" "${prefix}__type" "${prefix}__domain" \
"${prefix}__app_name" "${prefix}__pass" "${prefix}__secrets" "${prefix}__all" \ "${prefix}__app_name" "${prefix}__pass" "${prefix}__secrets" "${prefix}__all" \
"${prefix}__update" "${prefix}__force" "${prefix}__fast" "${prefix}__chaos" \ "${prefix}__update" "${prefix}__force" "${prefix}__fast" "${prefix}__chaos" \
@ -792,50 +792,54 @@ output_version_summary() {
fi fi
} }
# Note(decentral1se): inspired by https://github.com/vitalets/docker-stack-wait-deploy
ensure_stack_deployed() { ensure_stack_deployed() {
STACK_NAME=$1 local -a HEALTHY # mapping
local -a MISSING # mapping
warning "Polling deploy state to check for success" TIMEOUT=60
idx=0
while true; do IFS=' ' read -r -a SERVICES <<< "$(docker stack services "${STACK_NAME}" --format "{{.ID}}" | tr '\n' ' ')"
all_services_done=1
has_errors=0
service_ids=$(docker stack services -q "$STACK_NAME") while [ ! $(( ${#HEALTHY[@]} + ${#MISSING[@]} )) -eq ${#SERVICES[@]} ]; do
for service in $(docker ps -f "name=$STACK_NAME" -q); do
healthcheck=$(docker inspect --format "{{ json .State }}" "$service" | jq "try(.Health.Status // \"missing\")")
name=$(docker inspect --format '{{ index .Config.Labels "com.docker.swarm.service.name" }}' "$service")
for service_id in $service_ids; do if [[ ${MISSING[*]} =~ ${name} ]] || [[ ${HEALTHY[*]} =~ ${name} ]]; then
# see: https://github.com/moby/moby/issues/28012 continue
service_state=$(docker service inspect --format "{{if .UpdateStatus}}{{.UpdateStatus.State}}{{else}}created{{end}}" "$service_id") fi
debug "$service_id has state: $service_state" if [[ "$healthcheck" == "\"missing\"" ]] && [[ ! "${MISSING[*]}" =~ $name ]]; then
MISSING+=("$name")
info "$name has no healthcheck configured, cannot guarantee this service comes up successfully..."
continue
fi
case "$service_state" in if [[ "$healthcheck" == "\"healthy\"" ]] && [[ ! "${HEALTHY[*]}" =~ $name ]]; then
created|completed) HEALTHY+=("$name")
;; info "$name is healthy!"
paused|rollback_completed) continue
has_errors=1 fi
;;
*) if [[ "$healthcheck" == \""unhealthy"\" ]]; then
all_services_done=0 logs=$(docker inspect --format "{{ json .State.Health.Log }}" "$service")
;; exitcode="$(echo "$logs" | $JQ '.[-1] | .ExitCode')"
esac warning "Healthcheck for new instance of $name is failing (exit code: $exitcode)"
warning "$(echo "$logs" | $JQ -r '.[-1] | .Output')"
error "healthcheck for $name is failing, this deployment did not succeed :("
fi
done done
if [ "$all_services_done" == "1" ]; then idx=$(("$idx" + 1))
if [ "$has_errors" == "1" ]; then if [[ $idx -eq "$TIMEOUT" ]]; then
warning "Deployment appears to have failed" error "Waiting for healthy status timed out, this deployment did not succeed :("
warning "Run \"abra app ${STACK_NAME} logs \" to see app logs"
warning "Run \"abra app ${STACK_NAME} ps \" to see app status"
break
else
warning "Deployment appears to have suceeded"
break
fi
else
sleep 1
fi fi
sleep 1
info "Deploying: $(( ${#HEALTHY[@]} + ${#MISSING[@]} ))/${#SERVICES[@]} (timeout: $idx/$TIMEOUT)"
done done
success "All services up! Deployment succeeded!"
} }
ensure_domain_deployed() { ensure_domain_deployed() {
@ -870,7 +874,7 @@ load_instance() {
APP="$abra__app_" APP="$abra__app_"
# load all files matching "$APP.env" into ENV_FILES array # load all files matching "$APP.env" into ENV_FILES array
mapfile -t ENV_FILES < <(find -L "$ABRA_DIR" -name "$APP.env") mapfile -t ENV_FILES < <(find -L "$ABRA_DIR/servers/" -name "$APP.env")
# FIXME 3wc: requires bash 4, use for loop instead # FIXME 3wc: requires bash 4, use for loop instead
case "${#ENV_FILES[@]}" in case "${#ENV_FILES[@]}" in

View File

@ -43,3 +43,8 @@ build:
push: build push: build
@docker push thecoopcloud/abra @docker push thecoopcloud/abra
symlink:
@mkdir -p ~/.abra/servers/ && \
ln -srf tests/default ~/.abra/servers && \
ln -srf tests/apps/* ~/.abra/apps

View File

@ -0,0 +1,84 @@
---
# The goal of this compose file is to have a testing ground for understanding
# what cases we need to handle to get stable deployments. For that, we need to
# work with healthchecks and deploy configurations quite closely. If you run
# the `make symlink` target then this will be loaded into a "fake" app on your
# local machine which you can deploy with `abra`.
version: "3.8"
services:
r1_should_work:
image: redis:alpine
deploy:
update_config:
failure_action: rollback
order: start-first
rollback_config:
order: start-first
restart_policy:
max_attempts: 1
healthcheck:
test: redis-cli ping
interval: 2s
retries: 3
start_period: 1s
timeout: 3s
r2_broken_health_check:
image: redis:alpine
deploy:
update_config:
failure_action: rollback
order: start-first
rollback_config:
order: start-first
restart_policy:
max_attempts: 3
healthcheck:
test: foobar
interval: 2s
retries: 3
start_period: 1s
timeout: 3s
r3_no_health_check:
image: redis:alpine
deploy:
update_config:
failure_action: rollback
order: start-first
rollback_config:
order: start-first
restart_policy:
max_attempts: 3
r4_disabled_health_check:
image: redis:alpine
deploy:
update_config:
failure_action: rollback
order: start-first
rollback_config:
order: start-first
restart_policy:
max_attempts: 3
healthcheck:
disable: true
r5_should_also_work:
image: redis:alpine
deploy:
update_config:
failure_action: rollback
order: start-first
rollback_config:
order: start-first
restart_policy:
max_attempts: 1
healthcheck:
test: redis-cli ping
interval: 2s
retries: 3
start_period: 1s
timeout: 3s

1
tests/default/works.env Normal file
View File

@ -0,0 +1 @@
TYPE=works