Files
cc-ci/tests/concurrency/test_locks.py
autonomic-bot 84d90fb655
All checks were successful
continuous-integration/drone/push Build is passing
test(concurrency): real-kernel suite for the restructured model — 20 tests, 19 plan cases
tests/concurrency/ — NOT in the default `pytest tests/unit` gate; run explicitly with
`pytest tests/concurrency -q`. flock/prctl/alarm are never mocked: helper subprocesses
(helpers.py) hold real locks and install the real lifetime guards; locks live in a per-test
tmp dir via CCCI_APP_LOCK_DIR; every helper (and recorded grandchild) is reaped by fixture
cleanup.

- test_locks.py (cases 1-4): SIGKILL auto-release; LOCK_NB held/unheld semantics; PEP 446
  fd-not-inherited (holder's child survives, lock still releases); same-domain second acquire
  blocks until first holder exits.
- test_janitor.py (cases 5-12): orphan reaped once + lockfile unlinked; live holder never
  reaped + logged; new-run acquire blocks until a slow reap completes (reap-under-probe-lock);
  two overlapping janitors -> exactly one reaps (flock arbitration); reboot sim (no lockfile)
  reaps immediately with no age wait; >120min-held lock flagged 'possible leaked run' and NOT
  stolen; warm/canonical names never probed (no lockfile even created); directory-as-lockfile
  and missing lock dir degrade to skip+log, never crash.
- test_lifetime.py (cases 13-16): PDEATHSIG (wrapper parent SIGKILL'd -> guarded child TERM'd,
  teardown marker, lock released); already-orphaned helper REFUSES to run (ppid race); 2s
  deadline alarm -> teardown + exit 142 + lock released; SIGTERM -> teardown + exit 143 +
  lock released.
- test_abra_dir.py (cases 17-19 + 18b): per-run dir built + $ABRA_DIR exported before the
  first abra call (recording stub abra on PATH); two CONCURRENT same-recipe fetch+checkout
  flows into different ABRA_DIRs -> divergent correct trees, canonical staged clone untouched;
  .env written through the servers/ symlink lands in the canonical path (env_get/env_set
  agree); manual runs get pid-suffixed dirs.

On cc-ci: pytest tests/concurrency -q -> 20 passed; tests/unit -> 138 passed; lint PASS.
2026-06-10 04:29:36 +00:00

86 lines
3.3 KiB
Python

"""Lock fundamentals (concurrency-restructure plan, cases 1-4). Real kernel flocks held by real
subprocesses — nothing mocked."""
from __future__ import annotations
import fcntl
import os
import sys
import time
sys.path.insert(0, os.path.dirname(__file__))
sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "..", "runner"))
from concutil import ( # noqa: E402
DOMAIN,
lock_state,
wait_lock_state,
wait_marker,
)
from harness import lifecycle # noqa: E402
def test_1_sigkill_releases_lock(lock_dir, pool):
"""Case 1: acquire -> holder SIGKILL'd -> lock immediately acquirable (kernel auto-release).
The exact property the old pidfile registry approximated with /proc checks."""
p, out = pool.spawn("hold", DOMAIN)
assert wait_marker(out, "ACQUIRED"), "holder never acquired"
assert lock_state(DOMAIN) == "held"
p.kill()
p.wait(timeout=10)
assert wait_lock_state(DOMAIN, "free") == "free"
def test_2_nb_probe_held_vs_unheld(lock_dir, pool):
"""Case 2: LOCK_NB probe raises BlockingIOError against a held lock; succeeds when unheld."""
p, out = pool.spawn("hold", DOMAIN)
assert wait_marker(out, "ACQUIRED")
path = lifecycle._app_lock_path(DOMAIN) # noqa: SLF001
with open(path, "a") as f:
try:
fcntl.flock(f, fcntl.LOCK_EX | fcntl.LOCK_NB)
raise AssertionError("LOCK_NB succeeded against a held lock")
except BlockingIOError:
pass
p.kill()
p.wait(timeout=10)
assert wait_lock_state(DOMAIN, "free") == "free"
with open(path, "a") as f:
fcntl.flock(f, fcntl.LOCK_EX | fcntl.LOCK_NB) # must not raise now
def test_3_lock_fd_not_inherited_by_children(lock_dir, pool):
"""Case 3 (PEP 446): the holder spawns a subprocess child, the holder dies, the child lives —
and the lock is STILL released (the child never inherited the lock fd). This is what makes
'held lock == live HARNESS owner' sound even though runs spawn abra/docker/pytest children."""
p, out = pool.spawn("hold-with-child", DOMAIN)
assert wait_marker(out, "ACQUIRED")
child_line = wait_marker(out, "CHILD")
assert child_line, "holder never reported its child pid"
child_pid = int(child_line.split()[1])
pool.track_pid(child_pid)
p.kill()
p.wait(timeout=10)
assert os.path.exists(f"/proc/{child_pid}"), "child should outlive the holder"
assert (
wait_lock_state(DOMAIN, "free") == "free"
), "lock must release on holder death even with a live child (PEP 446 non-inheritable fd)"
def test_4_second_acquire_blocks_until_first_exits(lock_dir, pool):
"""Case 4: a second same-domain acquire blocks until the first holder exits — the
double-!testme serialisation property."""
p1, out1 = pool.spawn("hold", DOMAIN)
assert wait_marker(out1, "ACQUIRED")
p2, out2 = pool.spawn("hold", DOMAIN)
# p2 must NOT acquire while p1 holds.
time.sleep(1.5)
assert wait_marker(out2, "ACQUIRED", timeout=0.1) is None, "second acquire did not block"
t_kill = time.time()
p1.kill()
p1.wait(timeout=10)
line = wait_marker(out2, "ACQUIRED", timeout=15)
assert line, "second acquire never completed after first holder exited"
acquired_ts = float(line.split()[1])
assert acquired_ts >= t_kill - 0.05, "second holder acquired before the first exited"
assert lock_state(DOMAIN) == "held"