All checks were successful
continuous-integration/drone/push Build is passing
The four CCCI state files (deploys countfile, opstate, deps, depskip) were keyed by app domain in shared /tmp. A second run of the same domain executes its main() preamble + deploy_app's pre-lock _record_deploy BEFORE blocking at the app lock, so it reset/polluted the live first run's counter (false DG4.1 deploy-count=2, build 279) and the first run's end-of-run os.remove crashed the second (FileNotFoundError, build 281). Masked pre-restructure by the end-to-end recipe flock. Now keyed by run id + harness pid via _run_state_path(); children receive exact paths via the CCCI_*_FILE env vars, so domain keying was never load-bearing. tests/concurrency/test_run_state.py: path-invariant cases + a real-process regression (helpers.py deploy-count-run) reproducing the live interleaving — verified to FAIL under simulated shared keying. docs/concurrency.md §3 updated.
150 lines
5.3 KiB
Python
150 lines
5.3 KiB
Python
#!/usr/bin/env python3
|
|
"""Subprocess helpers for tests/concurrency — REAL kernel locks and the REAL lifetime guards in
|
|
separate processes (flock/prctl are never mocked; tests assert on actual kernel behavior).
|
|
|
|
Invoked as: python3 helpers.py <command> <args...>
|
|
|
|
Env contract (set by the spawning test):
|
|
CCCI_APP_LOCK_DIR sandbox lock dir (never /run/lock in tests)
|
|
CCCI_HELPER_OUT marker file this helper APPENDS progress lines to (ACQUIRED/READY/...)
|
|
|
|
Commands:
|
|
hold <domain> acquire the app lock, mark `ACQUIRED <ts>`, sleep forever
|
|
hold-with-child <domain> acquire the lock, spawn a plain sleeping subprocess child, mark
|
|
`ACQUIRED <ts>` + `CHILD <pid>` (PEP 446: the child must NOT
|
|
inherit the lock fd), sleep forever
|
|
guarded <domain> <deadline> install the REAL lifetime guards (alarm=<deadline>s), acquire the
|
|
lock, mark `READY`; when the teardown funnel runs (`finally:`),
|
|
mark `TEARDOWN` before exiting
|
|
wrapper <domain> spawn `guarded <domain> 3600` as MY child, mark `WRAPPED <pid>`,
|
|
sleep — the test kills me to prove PDEATHSIG TERMs the child
|
|
orphan-probe wait (bounded) until reparented (ppid==1), then install the
|
|
guards; mark `REFUSED` if they exit (expected) or `GUARDS_OK`
|
|
fetch-checkout <recipe> <ref> run run_recipe_ci.fetch_recipe (the test sets CCCI_SKIP_FETCH=1
|
|
+ a per-"run" ABRA_DIR), git-checkout <ref>, mark
|
|
`RESULT <head> <data.txt content>`
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import os
|
|
import subprocess
|
|
import sys
|
|
import time
|
|
|
|
sys.path.insert(0, os.path.join(os.path.dirname(os.path.abspath(__file__)), "..", "..", "runner"))
|
|
from harness import abra, lifecycle, lifetime # noqa: E402
|
|
|
|
OUT = os.environ.get("CCCI_HELPER_OUT")
|
|
|
|
|
|
def mark(line: str) -> None:
|
|
if OUT:
|
|
with open(OUT, "a") as f:
|
|
f.write(line + "\n")
|
|
f.flush()
|
|
print(line, flush=True)
|
|
|
|
|
|
def cmd_hold(domain: str) -> None:
|
|
lifecycle.acquire_app_lock(domain)
|
|
mark(f"ACQUIRED {time.time()}")
|
|
time.sleep(3600)
|
|
|
|
|
|
def cmd_hold_with_child(domain: str) -> None:
|
|
lifecycle.acquire_app_lock(domain)
|
|
child = subprocess.Popen([sys.executable, "-c", "import time; time.sleep(3600)"])
|
|
mark(f"ACQUIRED {time.time()}")
|
|
mark(f"CHILD {child.pid}")
|
|
time.sleep(3600)
|
|
|
|
|
|
def cmd_guarded(domain: str, deadline: str) -> None:
|
|
lifetime.install_lifetime_guards(deadline_seconds=int(deadline))
|
|
lifecycle.acquire_app_lock(domain)
|
|
mark("READY")
|
|
try:
|
|
time.sleep(3600)
|
|
finally:
|
|
mark("TEARDOWN")
|
|
|
|
|
|
def cmd_wrapper(domain: str) -> None:
|
|
p = subprocess.Popen( # noqa: S603
|
|
[sys.executable, os.path.abspath(__file__), "guarded", domain, "3600"],
|
|
env=os.environ.copy(),
|
|
)
|
|
mark(f"WRAPPED {p.pid}")
|
|
time.sleep(3600)
|
|
|
|
|
|
def cmd_orphan_probe() -> None:
|
|
# Our spawner exits immediately after fork; wait (bounded) until we are reparented so the
|
|
# prctl is installed with the parent ALREADY dead — the exact race the ppid check closes.
|
|
for _ in range(200):
|
|
if os.getppid() == 1:
|
|
break
|
|
time.sleep(0.05)
|
|
else:
|
|
mark("NEVER_REPARENTED") # e.g. a subreaper environment — test will fail visibly
|
|
return
|
|
try:
|
|
lifetime.install_lifetime_guards()
|
|
except SystemExit:
|
|
mark("REFUSED")
|
|
raise
|
|
mark("GUARDS_OK")
|
|
|
|
|
|
def cmd_fetch_checkout(recipe: str, ref: str) -> None:
|
|
import run_recipe_ci
|
|
|
|
run_recipe_ci.fetch_recipe(recipe, None, None)
|
|
abra.recipe_checkout(recipe, ref)
|
|
head = abra.recipe_head_commit(recipe)
|
|
with open(os.path.join(abra.recipe_dir(recipe), "data.txt")) as f:
|
|
content = f.read().strip()
|
|
mark(f"RESULT {head} {content}")
|
|
|
|
|
|
def cmd_deploy_count_run(domain: str, gate: str) -> None:
|
|
"""Mirror the REAL run flow for the DG4.1 counter (CONC-A1 regression): countfile init
|
|
(main() preamble) → _record_deploy (deploy_app fires it BEFORE the app lock) → acquire
|
|
the app lock → wait for `gate` (file path; '' = no wait) → read + remove own countfile.
|
|
Two of these on the SAME domain must each see COUNT 1 and never lose their file."""
|
|
import run_recipe_ci
|
|
|
|
countfile = run_recipe_ci._run_state_path("deploys")
|
|
with open(countfile, "w") as f:
|
|
f.write("0")
|
|
os.environ["CCCI_DEPLOY_COUNT_FILE"] = countfile
|
|
lifecycle._record_deploy() # pre-lock, exactly like lifecycle.deploy_app()
|
|
mark("PRELOCK")
|
|
lifecycle.acquire_app_lock(domain)
|
|
mark("ACQUIRED")
|
|
if gate:
|
|
deadline = time.time() + 15
|
|
while not os.path.exists(gate) and time.time() < deadline:
|
|
time.sleep(0.05)
|
|
try:
|
|
with open(countfile) as f:
|
|
n = int(f.read().strip() or "0")
|
|
os.remove(countfile)
|
|
mark(f"COUNT {n}")
|
|
except FileNotFoundError:
|
|
mark("COUNT_FILE_MISSING")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
cmd, *args = sys.argv[1:]
|
|
{
|
|
"hold": cmd_hold,
|
|
"hold-with-child": cmd_hold_with_child,
|
|
"guarded": cmd_guarded,
|
|
"wrapper": cmd_wrapper,
|
|
"orphan-probe": cmd_orphan_probe,
|
|
"fetch-checkout": cmd_fetch_checkout,
|
|
"deploy-count-run": cmd_deploy_count_run,
|
|
}[cmd](*args)
|