From 93714a593b9e182d3483de570d657059faa444ed Mon Sep 17 00:00:00 2001 From: decentral1se Date: Tue, 15 Jun 2021 23:52:57 +0200 Subject: [PATCH] `ensure_stack_deployed` is now somewhat more reliable Closes https://git.autonomic.zone/coop-cloud/abra/issues/165. --- abra | 138 ++++++++++++++++++----------------- makefile | 5 ++ tests/apps/works/compose.yml | 84 +++++++++++++++++++++ tests/default/works.env | 1 + 4 files changed, 161 insertions(+), 67 deletions(-) create mode 100644 tests/apps/works/compose.yml create mode 100644 tests/default/works.env diff --git a/abra b/abra index ce50184..a94fad7 100755 --- a/abra +++ b/abra @@ -169,13 +169,13 @@ printf -- "cat <<'EOM' >&2\n%s\nEOM\n" "$1"; }; error() { [[ -n $1 ]] && stderr "$1"; stderr "$usage"; _return 1; }; _return() { printf -- "exit %d\n" "$1"; exit "$1"; }; set -e; trimmed_doc=${DOC:1:2451} usage=${DOC:40:1842}; digest=c7bae -shorts=(-e -b -s -C -U -h -d -v -n '' '' '' '' '' '' '' '' '' '' '' '' '' '' '' '' '') -longs=(--env --branch --stack --skip-check --skip-update --help --debug --verbose --no-prompt --status --server --type --domain --app-name --pass --secrets --all --update --force --fast --chaos --volumes --no-tty --user --bump --dev) -argcounts=(1 1 1 0 0 0 0 0 0 0 1 1 1 1 0 0 0 0 0 0 0 0 0 1 0 0); node_0(){ -value __env 0; }; node_1(){ value __branch 1; }; node_2(){ value __stack 2; } -node_3(){ switch __skip_check 3; }; node_4(){ switch __skip_update 4; } -node_5(){ switch __help 5; }; node_6(){ switch __debug 6; }; node_7(){ -switch __verbose 7; }; node_8(){ switch __no_prompt 8; }; node_9(){ +shorts=(-C -n -U -e -b -d -h -s -v '' '' '' '' '' '' '' '' '' '' '' '' '' '' '' '' '') +longs=(--skip-check --no-prompt --skip-update --env --branch --debug --help --stack --verbose --status --server --type --domain --app-name --pass --secrets --all --update --force --fast --chaos --volumes --no-tty --user --bump --dev) +argcounts=(0 0 0 1 1 0 0 1 0 0 1 1 1 1 0 0 0 0 0 0 0 0 0 1 0 0); node_0(){ +switch __skip_check 0; }; node_1(){ switch __no_prompt 1; }; node_2(){ +switch __skip_update 2; }; node_3(){ value __env 3; }; node_4(){ +value __branch 4; }; node_5(){ switch __debug 5; }; node_6(){ switch __help 6; } +node_7(){ value __stack 7; }; node_8(){ switch __verbose 8; }; node_9(){ switch __status 9; }; node_10(){ value __server 10; }; node_11(){ value __type 11; }; node_12(){ value __domain 12; }; node_13(){ value __app_name 13; }; node_14(){ switch __pass 14; }; node_15(){ @@ -245,24 +245,24 @@ required 80; }; node_157(){ either 86 91 94 100 101 102 103 104 106 107 108 112 114 118 119 124 125 128 129 130 133 135 136 137 139 140 143 144 145 146 147 148 150 151 152 155 156 }; node_158(){ required 157; }; cat <<<' docopt_exit() { [[ -n $1 ]] && printf "%s\n" "$1" >&2; printf "%s\n" "${DOC:40:1842}" >&2 -exit 1; }'; unset var___env var___branch var___stack var___skip_check \ -var___skip_update var___help var___debug var___verbose var___no_prompt \ -var___status var___server var___type var___domain var___app_name var___pass \ -var___secrets var___all var___update var___force var___fast var___chaos \ -var___volumes var___no_tty var___user var___bump var___dev var__type_ \ -var__app_ var__service_ var__version_ var__src_ var__dst_ var__backup_file_ \ -var__args_ var__secret_ var__cmd_ var__data_ var__volume_ var__command_ \ -var__recipe_ var__host_ var__user_ var__port_ var__provider_ var__subcommands_ \ -var_app var_list var_ls var_new var_backup var_deploy var_check var_version \ -var_config var_cp var_logs var_ps var_restore var_rm var_delete var_run \ -var_rollback var_secret var_generate var_insert var_undeploy var_volume \ -var_recipe var_create var_release var_versions var_server var_add var___ \ -var_init var_apps var_upgrade var_doctor var_help; parse 158 "$@" -local prefix=${DOCOPT_PREFIX:-''}; unset "${prefix}__env" "${prefix}__branch" \ -"${prefix}__stack" "${prefix}__skip_check" "${prefix}__skip_update" \ -"${prefix}__help" "${prefix}__debug" "${prefix}__verbose" \ -"${prefix}__no_prompt" "${prefix}__status" "${prefix}__server" \ -"${prefix}__type" "${prefix}__domain" "${prefix}__app_name" "${prefix}__pass" \ +exit 1; }'; unset var___skip_check var___no_prompt var___skip_update var___env \ +var___branch var___debug var___help var___stack var___verbose var___status \ +var___server var___type var___domain var___app_name var___pass var___secrets \ +var___all var___update var___force var___fast var___chaos var___volumes \ +var___no_tty var___user var___bump var___dev var__type_ var__app_ \ +var__service_ var__version_ var__src_ var__dst_ var__backup_file_ var__args_ \ +var__secret_ var__cmd_ var__data_ var__volume_ var__command_ var__recipe_ \ +var__host_ var__user_ var__port_ var__provider_ var__subcommands_ var_app \ +var_list var_ls var_new var_backup var_deploy var_check var_version var_config \ +var_cp var_logs var_ps var_restore var_rm var_delete var_run var_rollback \ +var_secret var_generate var_insert var_undeploy var_volume var_recipe \ +var_create var_release var_versions var_server var_add var___ var_init \ +var_apps var_upgrade var_doctor var_help; parse 158 "$@" +local prefix=${DOCOPT_PREFIX:-''}; unset "${prefix}__skip_check" \ +"${prefix}__no_prompt" "${prefix}__skip_update" "${prefix}__env" \ +"${prefix}__branch" "${prefix}__debug" "${prefix}__help" "${prefix}__stack" \ +"${prefix}__verbose" "${prefix}__status" "${prefix}__server" "${prefix}__type" \ +"${prefix}__domain" "${prefix}__app_name" "${prefix}__pass" \ "${prefix}__secrets" "${prefix}__all" "${prefix}__update" "${prefix}__force" \ "${prefix}__fast" "${prefix}__chaos" "${prefix}__volumes" "${prefix}__no_tty" \ "${prefix}__user" "${prefix}__bump" "${prefix}__dev" "${prefix}_type_" \ @@ -279,15 +279,15 @@ local prefix=${DOCOPT_PREFIX:-''}; unset "${prefix}__env" "${prefix}__branch" \ "${prefix}recipe" "${prefix}create" "${prefix}release" "${prefix}versions" \ "${prefix}server" "${prefix}add" "${prefix}__" "${prefix}init" "${prefix}apps" \ "${prefix}upgrade" "${prefix}doctor" "${prefix}help" +eval "${prefix}"'__skip_check=${var___skip_check:-false}' +eval "${prefix}"'__no_prompt=${var___no_prompt:-false}' +eval "${prefix}"'__skip_update=${var___skip_update:-false}' eval "${prefix}"'__env=${var___env:-}' eval "${prefix}"'__branch=${var___branch:-}' -eval "${prefix}"'__stack=${var___stack:-}' -eval "${prefix}"'__skip_check=${var___skip_check:-false}' -eval "${prefix}"'__skip_update=${var___skip_update:-false}' -eval "${prefix}"'__help=${var___help:-false}' eval "${prefix}"'__debug=${var___debug:-false}' +eval "${prefix}"'__help=${var___help:-false}' +eval "${prefix}"'__stack=${var___stack:-}' eval "${prefix}"'__verbose=${var___verbose:-false}' -eval "${prefix}"'__no_prompt=${var___no_prompt:-false}' eval "${prefix}"'__status=${var___status:-false}' eval "${prefix}"'__server=${var___server:-}' eval "${prefix}"'__type=${var___type:-}' @@ -355,9 +355,9 @@ eval "${prefix}"'upgrade=${var_upgrade:-false}' eval "${prefix}"'doctor=${var_doctor:-false}' eval "${prefix}"'help=${var_help:-false}'; local docopt_i=1 [[ $BASH_VERSION =~ ^4.3 ]] && docopt_i=2; for ((;docopt_i>0;docopt_i--)); do -declare -p "${prefix}__env" "${prefix}__branch" "${prefix}__stack" \ -"${prefix}__skip_check" "${prefix}__skip_update" "${prefix}__help" \ -"${prefix}__debug" "${prefix}__verbose" "${prefix}__no_prompt" \ +declare -p "${prefix}__skip_check" "${prefix}__no_prompt" \ +"${prefix}__skip_update" "${prefix}__env" "${prefix}__branch" \ +"${prefix}__debug" "${prefix}__help" "${prefix}__stack" "${prefix}__verbose" \ "${prefix}__status" "${prefix}__server" "${prefix}__type" "${prefix}__domain" \ "${prefix}__app_name" "${prefix}__pass" "${prefix}__secrets" "${prefix}__all" \ "${prefix}__update" "${prefix}__force" "${prefix}__fast" "${prefix}__chaos" \ @@ -792,50 +792,54 @@ output_version_summary() { fi } -# Note(decentral1se): inspired by https://github.com/vitalets/docker-stack-wait-deploy ensure_stack_deployed() { - STACK_NAME=$1 + local -a HEALTHY # mapping + local -a MISSING # mapping - warning "Polling deploy state to check for success" + TIMEOUT=60 + idx=0 - while true; do - all_services_done=1 - has_errors=0 + IFS=' ' read -r -a SERVICES <<< "$(docker stack services "${STACK_NAME}" --format "{{.ID}}" | tr '\n' ' ')" - service_ids=$(docker stack services -q "$STACK_NAME") + while [ ! $(( ${#HEALTHY[@]} + ${#MISSING[@]} )) -eq ${#SERVICES[@]} ]; do + for service in $(docker ps -f "name=$STACK_NAME" -q); do + healthcheck=$(docker inspect --format "{{ json .State }}" "$service" | jq "try(.Health.Status // \"missing\")") + name=$(docker inspect --format '{{ index .Config.Labels "com.docker.swarm.service.name" }}' "$service") - for service_id in $service_ids; do - # see: https://github.com/moby/moby/issues/28012 - service_state=$(docker service inspect --format "{{if .UpdateStatus}}{{.UpdateStatus.State}}{{else}}created{{end}}" "$service_id") + if [[ ${MISSING[*]} =~ ${name} ]] || [[ ${HEALTHY[*]} =~ ${name} ]]; then + continue + fi - debug "$service_id has state: $service_state" + if [[ "$healthcheck" == "\"missing\"" ]] && [[ ! "${MISSING[*]}" =~ $name ]]; then + MISSING+=("$name") + info "$name has no healthcheck configured, cannot guarantee this service comes up successfully..." + continue + fi - case "$service_state" in - created|completed) - ;; - paused|rollback_completed) - has_errors=1 - ;; - *) - all_services_done=0 - ;; - esac + if [[ "$healthcheck" == "\"healthy\"" ]] && [[ ! "${HEALTHY[*]}" =~ $name ]]; then + HEALTHY+=("$name") + info "$name is healthy!" + continue + fi + + if [[ "$healthcheck" == \""unhealthy"\" ]]; then + logs=$(docker inspect --format "{{ json .State.Health.Log }}" "$service") + exitcode="$(echo "$logs" | $JQ '.[-1] | .ExitCode')" + warning "Healthcheck for new instance of $name is failing (exit code: $exitcode)" + warning "$(echo "$logs" | $JQ -r '.[-1] | .Output')" + error "healthcheck for $name is failing, this deployment did not succeed :(" + fi done - if [ "$all_services_done" == "1" ]; then - if [ "$has_errors" == "1" ]; then - warning "Deployment appears to have failed" - warning "Run \"abra app ${STACK_NAME} logs \" to see app logs" - warning "Run \"abra app ${STACK_NAME} ps \" to see app status" - break - else - warning "Deployment appears to have suceeded" - break - fi - else - sleep 1 + idx=$(("$idx" + 1)) + if [[ $idx -eq "$TIMEOUT" ]]; then + error "Waiting for healthy status timed out, this deployment did not succeed :(" fi + + sleep 1 + info "Deploying: $(( ${#HEALTHY[@]} + ${#MISSING[@]} ))/${#SERVICES[@]} (timeout: $idx/$TIMEOUT)" done + success "All services up! Deployment succeeded!" } ensure_domain_deployed() { @@ -870,7 +874,7 @@ load_instance() { APP="$abra__app_" # load all files matching "$APP.env" into ENV_FILES array - mapfile -t ENV_FILES < <(find -L "$ABRA_DIR" -name "$APP.env") + mapfile -t ENV_FILES < <(find -L "$ABRA_DIR/servers/" -name "$APP.env") # FIXME 3wc: requires bash 4, use for loop instead case "${#ENV_FILES[@]}" in diff --git a/makefile b/makefile index 7fd3edf..fc557f9 100644 --- a/makefile +++ b/makefile @@ -43,3 +43,8 @@ build: push: build @docker push thecoopcloud/abra + +symlink: + @mkdir -p ~/.abra/servers/ && \ + ln -srf tests/default ~/.abra/servers && \ + ln -srf tests/apps/* ~/.abra/apps diff --git a/tests/apps/works/compose.yml b/tests/apps/works/compose.yml new file mode 100644 index 0000000..5dba367 --- /dev/null +++ b/tests/apps/works/compose.yml @@ -0,0 +1,84 @@ +--- + +# The goal of this compose file is to have a testing ground for understanding +# what cases we need to handle to get stable deployments. For that, we need to +# work with healthchecks and deploy configurations quite closely. If you run +# the `make symlink` target then this will be loaded into a "fake" app on your +# local machine which you can deploy with `abra`. + +version: "3.8" +services: + r1_should_work: + image: redis:alpine + deploy: + update_config: + failure_action: rollback + order: start-first + rollback_config: + order: start-first + restart_policy: + max_attempts: 1 + healthcheck: + test: redis-cli ping + interval: 2s + retries: 3 + start_period: 1s + timeout: 3s + + r2_broken_health_check: + image: redis:alpine + deploy: + update_config: + failure_action: rollback + order: start-first + rollback_config: + order: start-first + restart_policy: + max_attempts: 3 + healthcheck: + test: foobar + interval: 2s + retries: 3 + start_period: 1s + timeout: 3s + + r3_no_health_check: + image: redis:alpine + deploy: + update_config: + failure_action: rollback + order: start-first + rollback_config: + order: start-first + restart_policy: + max_attempts: 3 + + r4_disabled_health_check: + image: redis:alpine + deploy: + update_config: + failure_action: rollback + order: start-first + rollback_config: + order: start-first + restart_policy: + max_attempts: 3 + healthcheck: + disable: true + + r5_should_also_work: + image: redis:alpine + deploy: + update_config: + failure_action: rollback + order: start-first + rollback_config: + order: start-first + restart_policy: + max_attempts: 1 + healthcheck: + test: redis-cli ping + interval: 2s + retries: 3 + start_period: 1s + timeout: 3s diff --git a/tests/default/works.env b/tests/default/works.env new file mode 100644 index 0000000..aa318fd --- /dev/null +++ b/tests/default/works.env @@ -0,0 +1 @@ +TYPE=works