fix(1d): bounded retry in _app_container (backup briefly cycles the app container)

abra app backup create (backup-bot-two) stops/cycles the app container, so a mutate exec_in_app
right after backup hit an empty docker ps and raised. _app_container now polls (no bare sleep) for
the container to reappear within a timeout. Recipe-agnostic harness robustness.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-05-28 00:06:28 +01:00
parent 81e26a1bdc
commit feb6f80d50

View File

@ -295,18 +295,25 @@ def previous_version(recipe: str) -> str | None:
return vers[-2] if len(vers) >= 2 else None return vers[-2] if len(vers) >= 2 else None
def _app_container(domain: str, service: str = "app") -> str: def _app_container(domain: str, service: str = "app", timeout: int = 60) -> str:
"""The running container id for <stack>_<service>.""" """The running container id for <stack>_<service>, with a BOUNDED POLL for it to (re)appear.
A lifecycle op can briefly leave no running task — notably `abra app backup create`, where
backup-bot-two stops/cycles the app container, so a mutate exec right after backup hit an empty
`docker ps` and raised. Poll (no bare sleep) until the container is back or timeout."""
name = f"{_stack_name(domain)}_{service}" name = f"{_stack_name(domain)}_{service}"
proc = subprocess.run( deadline = time.time() + timeout
["docker", "ps", "--filter", f"name={name}", "--format", "{{.ID}}"], while True:
capture_output=True, proc = subprocess.run(
text=True, ["docker", "ps", "--filter", f"name={name}", "--format", "{{.ID}}"],
) capture_output=True,
cid = proc.stdout.strip().split("\n")[0] text=True,
if not cid: )
raise RuntimeError(f"no running container for {name}") cid = proc.stdout.strip().split("\n")[0]
return cid if cid:
return cid
if time.time() >= deadline:
raise RuntimeError(f"no running container for {name} after {timeout}s")
time.sleep(3)
def exec_in_app(domain: str, cmd: list[str], service: str = "app") -> str: def exec_in_app(domain: str, cmd: list[str], service: str = "app") -> str: