fix(1d): bounded retry in _app_container (backup briefly cycles the app container)
abra app backup create (backup-bot-two) stops/cycles the app container, so a mutate exec_in_app right after backup hit an empty docker ps and raised. _app_container now polls (no bare sleep) for the container to reappear within a timeout. Recipe-agnostic harness robustness. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@ -295,18 +295,25 @@ def previous_version(recipe: str) -> str | None:
|
|||||||
return vers[-2] if len(vers) >= 2 else None
|
return vers[-2] if len(vers) >= 2 else None
|
||||||
|
|
||||||
|
|
||||||
def _app_container(domain: str, service: str = "app") -> str:
|
def _app_container(domain: str, service: str = "app", timeout: int = 60) -> str:
|
||||||
"""The running container id for <stack>_<service>."""
|
"""The running container id for <stack>_<service>, with a BOUNDED POLL for it to (re)appear.
|
||||||
|
A lifecycle op can briefly leave no running task — notably `abra app backup create`, where
|
||||||
|
backup-bot-two stops/cycles the app container, so a mutate exec right after backup hit an empty
|
||||||
|
`docker ps` and raised. Poll (no bare sleep) until the container is back or timeout."""
|
||||||
name = f"{_stack_name(domain)}_{service}"
|
name = f"{_stack_name(domain)}_{service}"
|
||||||
proc = subprocess.run(
|
deadline = time.time() + timeout
|
||||||
["docker", "ps", "--filter", f"name={name}", "--format", "{{.ID}}"],
|
while True:
|
||||||
capture_output=True,
|
proc = subprocess.run(
|
||||||
text=True,
|
["docker", "ps", "--filter", f"name={name}", "--format", "{{.ID}}"],
|
||||||
)
|
capture_output=True,
|
||||||
cid = proc.stdout.strip().split("\n")[0]
|
text=True,
|
||||||
if not cid:
|
)
|
||||||
raise RuntimeError(f"no running container for {name}")
|
cid = proc.stdout.strip().split("\n")[0]
|
||||||
return cid
|
if cid:
|
||||||
|
return cid
|
||||||
|
if time.time() >= deadline:
|
||||||
|
raise RuntimeError(f"no running container for {name} after {timeout}s")
|
||||||
|
time.sleep(3)
|
||||||
|
|
||||||
|
|
||||||
def exec_in_app(domain: str, cmd: list[str], service: str = "app") -> str:
|
def exec_in_app(domain: str, cmd: list[str], service: str = "app") -> str:
|
||||||
|
|||||||
Reference in New Issue
Block a user