All checks were successful
continuous-integration/drone/push Build is passing
tests/concurrency/ — NOT in the default `pytest tests/unit` gate; run explicitly with `pytest tests/concurrency -q`. flock/prctl/alarm are never mocked: helper subprocesses (helpers.py) hold real locks and install the real lifetime guards; locks live in a per-test tmp dir via CCCI_APP_LOCK_DIR; every helper (and recorded grandchild) is reaped by fixture cleanup. - test_locks.py (cases 1-4): SIGKILL auto-release; LOCK_NB held/unheld semantics; PEP 446 fd-not-inherited (holder's child survives, lock still releases); same-domain second acquire blocks until first holder exits. - test_janitor.py (cases 5-12): orphan reaped once + lockfile unlinked; live holder never reaped + logged; new-run acquire blocks until a slow reap completes (reap-under-probe-lock); two overlapping janitors -> exactly one reaps (flock arbitration); reboot sim (no lockfile) reaps immediately with no age wait; >120min-held lock flagged 'possible leaked run' and NOT stolen; warm/canonical names never probed (no lockfile even created); directory-as-lockfile and missing lock dir degrade to skip+log, never crash. - test_lifetime.py (cases 13-16): PDEATHSIG (wrapper parent SIGKILL'd -> guarded child TERM'd, teardown marker, lock released); already-orphaned helper REFUSES to run (ppid race); 2s deadline alarm -> teardown + exit 142 + lock released; SIGTERM -> teardown + exit 143 + lock released. - test_abra_dir.py (cases 17-19 + 18b): per-run dir built + $ABRA_DIR exported before the first abra call (recording stub abra on PATH); two CONCURRENT same-recipe fetch+checkout flows into different ABRA_DIRs -> divergent correct trees, canonical staged clone untouched; .env written through the servers/ symlink lands in the canonical path (env_get/env_set agree); manual runs get pid-suffixed dirs. On cc-ci: pytest tests/concurrency -q -> 20 passed; tests/unit -> 138 passed; lint PASS.
83 lines
3.6 KiB
Python
83 lines
3.6 KiB
Python
"""Lifetime hardening (concurrency-restructure plan, cases 13-16): the REAL prctl/signal/alarm
|
|
guards installed by helper subprocesses; tests assert teardown ran, exit was non-zero, and the
|
|
lock was released."""
|
|
|
|
from __future__ import annotations
|
|
|
|
import os
|
|
import signal
|
|
import sys
|
|
|
|
sys.path.insert(0, os.path.dirname(__file__))
|
|
sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "..", "runner"))
|
|
from concutil import ( # noqa: E402
|
|
DOMAIN,
|
|
wait_lock_state,
|
|
wait_marker,
|
|
wait_pid_gone,
|
|
)
|
|
|
|
|
|
def test_13_pdeathsig_parent_kill_terms_harness(lock_dir, pool):
|
|
"""Case 13: wrapper-parent spawns a guarded harness-child; the parent is SIGKILL'd (the
|
|
harness gets no courtesy signal) -> the kernel's PDEATHSIG TERMs the child, its teardown
|
|
funnel runs, it exits, and the lock is released."""
|
|
p, out = pool.spawn("wrapper", DOMAIN)
|
|
line = wait_marker(out, "WRAPPED")
|
|
assert line, "wrapper never spawned its child"
|
|
child_pid = int(line.split()[1])
|
|
pool.track_pid(child_pid)
|
|
assert wait_marker(out, "READY"), "guarded child never got ready"
|
|
|
|
p.kill() # parent dies WITHOUT signalling the child — only PDEATHSIG can save us
|
|
p.wait(timeout=10)
|
|
assert wait_pid_gone(child_pid), "guarded child must exit on parent death (PDEATHSIG)"
|
|
assert wait_marker(out, "TEARDOWN", timeout=5), "teardown funnel did not run"
|
|
assert wait_lock_state(DOMAIN, "free") == "free"
|
|
|
|
|
|
def test_14_already_orphaned_helper_refuses_to_run(lock_dir, pool):
|
|
"""Case 14 (ppid race): a helper whose parent died BEFORE the prctl was armed (it starts
|
|
already reparented to pid 1) must refuse to run — PDEATHSIG would never fire for it."""
|
|
# Spawn an intermediate parent that forks orphan-probe and exits immediately.
|
|
import subprocess
|
|
|
|
out = os.path.join(pool.out_dir, "orphan.out")
|
|
intermediate = (
|
|
"import subprocess, sys, os; "
|
|
"subprocess.Popen([sys.executable, os.environ['CCCI_HELPERS'], 'orphan-probe']); "
|
|
)
|
|
env = dict(
|
|
os.environ,
|
|
CCCI_HELPER_OUT=out,
|
|
CCCI_HELPERS=os.path.join(os.path.dirname(__file__), "helpers.py"),
|
|
)
|
|
subprocess.run([sys.executable, "-c", intermediate], env=env, timeout=15, check=True)
|
|
line = wait_marker(out, "REFUSED", timeout=20)
|
|
assert line, "orphaned helper did not refuse to run (or never reparented to pid 1)"
|
|
|
|
|
|
def test_15_deadline_alarm_fires_teardown_and_releases(lock_dir, pool):
|
|
"""Case 15: the self-deadline (alarm). A guarded helper with a 2s deadline tears down via
|
|
the funnel (finally: ran), exits NON-zero, and its lock is released."""
|
|
p, out = pool.spawn("guarded", DOMAIN, "2")
|
|
assert wait_marker(out, "READY")
|
|
rc = p.wait(timeout=20)
|
|
assert rc != 0, f"deadline exit must be non-zero (got {rc})"
|
|
assert rc == 128 + signal.SIGALRM, f"expected 142 (128+SIGALRM), got {rc}"
|
|
assert wait_marker(out, "TEARDOWN", timeout=5), "teardown funnel did not run on deadline"
|
|
assert wait_lock_state(DOMAIN, "free") == "free"
|
|
|
|
|
|
def test_16_sigterm_runs_teardown_funnel_and_releases(lock_dir, pool):
|
|
"""Case 16: SIGTERM (drone cancel path) -> the finally: teardown funnel runs, exit is
|
|
non-zero, lock released."""
|
|
p, out = pool.spawn("guarded", DOMAIN, "3600")
|
|
assert wait_marker(out, "READY")
|
|
p.send_signal(signal.SIGTERM)
|
|
rc = p.wait(timeout=20)
|
|
assert rc != 0, f"SIGTERM exit must be non-zero (got {rc})"
|
|
assert rc == 128 + signal.SIGTERM, f"expected 143 (128+SIGTERM), got {rc}"
|
|
assert wait_marker(out, "TEARDOWN", timeout=5), "teardown funnel did not run on SIGTERM"
|
|
assert wait_lock_state(DOMAIN, "free") == "free"
|