Files
cc-ci/tests/concurrency/concutil.py
autonomic-bot 84d90fb655
All checks were successful
continuous-integration/drone/push Build is passing
test(concurrency): real-kernel suite for the restructured model — 20 tests, 19 plan cases
tests/concurrency/ — NOT in the default `pytest tests/unit` gate; run explicitly with
`pytest tests/concurrency -q`. flock/prctl/alarm are never mocked: helper subprocesses
(helpers.py) hold real locks and install the real lifetime guards; locks live in a per-test
tmp dir via CCCI_APP_LOCK_DIR; every helper (and recorded grandchild) is reaped by fixture
cleanup.

- test_locks.py (cases 1-4): SIGKILL auto-release; LOCK_NB held/unheld semantics; PEP 446
  fd-not-inherited (holder's child survives, lock still releases); same-domain second acquire
  blocks until first holder exits.
- test_janitor.py (cases 5-12): orphan reaped once + lockfile unlinked; live holder never
  reaped + logged; new-run acquire blocks until a slow reap completes (reap-under-probe-lock);
  two overlapping janitors -> exactly one reaps (flock arbitration); reboot sim (no lockfile)
  reaps immediately with no age wait; >120min-held lock flagged 'possible leaked run' and NOT
  stolen; warm/canonical names never probed (no lockfile even created); directory-as-lockfile
  and missing lock dir degrade to skip+log, never crash.
- test_lifetime.py (cases 13-16): PDEATHSIG (wrapper parent SIGKILL'd -> guarded child TERM'd,
  teardown marker, lock released); already-orphaned helper REFUSES to run (ppid race); 2s
  deadline alarm -> teardown + exit 142 + lock released; SIGTERM -> teardown + exit 143 +
  lock released.
- test_abra_dir.py (cases 17-19 + 18b): per-run dir built + $ABRA_DIR exported before the
  first abra call (recording stub abra on PATH); two CONCURRENT same-recipe fetch+checkout
  flows into different ABRA_DIRs -> divergent correct trees, canonical staged clone untouched;
  .env written through the servers/ symlink lands in the canonical path (env_get/env_set
  agree); manual runs get pid-suffixed dirs.

On cc-ci: pytest tests/concurrency -q -> 20 passed; tests/unit -> 138 passed; lint PASS.
2026-06-10 04:29:36 +00:00

109 lines
3.5 KiB
Python

"""Shared utilities for the real-kernel concurrency suite (imported by the test modules; the
fixtures in conftest.py wrap these). No flock mocking anywhere — probes use real LOCK_NB."""
from __future__ import annotations
import contextlib
import fcntl
import os
import signal
import subprocess
import sys
import time
sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "..", "runner"))
from harness import lifecycle # noqa: E402
HELPERS = os.path.join(os.path.dirname(__file__), "helpers.py")
DOMAIN = "test-abc123.ci.commoninternet.net" # matches RUN_APP_RE
class HelperPool:
"""Spawns helpers.py subprocesses and GUARANTEES their cleanup (incl. recorded grandchild
pids from `hold-with-child`/`wrapper` markers) — no leaked children in the test VM."""
def __init__(self, out_dir: str):
self.out_dir = out_dir
self.procs: list[subprocess.Popen] = []
self.extra_pids: list[int] = []
self._n = 0
def spawn(self, *args: str, env_extra: dict | None = None) -> tuple[subprocess.Popen, str]:
"""Start `helpers.py <args...>`; returns (proc, marker_file)."""
self._n += 1
out = os.path.join(self.out_dir, f"helper-{self._n}.out")
env = dict(os.environ, CCCI_HELPER_OUT=out, **(env_extra or {}))
p = subprocess.Popen( # noqa: S603
[sys.executable, HELPERS, *args],
env=env,
stdout=subprocess.DEVNULL,
stderr=subprocess.STDOUT,
)
self.procs.append(p)
return p, out
def track_pid(self, pid: int) -> None:
self.extra_pids.append(pid)
def cleanup(self) -> None:
for p in self.procs:
if p.poll() is None:
p.kill()
with contextlib.suppress(subprocess.TimeoutExpired):
p.wait(timeout=10)
for pid in self.extra_pids:
with contextlib.suppress(OSError):
os.kill(pid, signal.SIGKILL)
def wait_marker(out: str, token: str, timeout: float = 15.0) -> str | None:
"""Poll a helper's marker file for a line containing `token`; returns the line or None."""
deadline = time.time() + timeout
while time.time() < deadline:
try:
with open(out) as f:
for line in f:
if token in line:
return line.strip()
except OSError:
pass
time.sleep(0.1)
return None
def lock_state(domain: str) -> str:
"""'held' | 'free' | 'absent' for the domain's lockfile, probed with a REAL LOCK_NB."""
path = lifecycle._app_lock_path(domain) # noqa: SLF001
if not os.path.exists(path):
return "absent"
with open(path, "a") as f:
try:
fcntl.flock(f, fcntl.LOCK_EX | fcntl.LOCK_NB)
return "free"
except BlockingIOError:
return "held"
def wait_lock_state(domain: str, want: str, timeout: float = 10.0) -> str:
"""Poll until lock_state(domain) == want (kernel release on process death is fast, but give
the scheduler room). Returns the final observed state."""
deadline = time.time() + timeout
state = lock_state(domain)
while state != want and time.time() < deadline:
time.sleep(0.1)
state = lock_state(domain)
return state
def pid_alive(pid: int) -> bool:
return os.path.exists(f"/proc/{pid}")
def wait_pid_gone(pid: int, timeout: float = 15.0) -> bool:
deadline = time.time() + timeout
while time.time() < deadline:
if not pid_alive(pid):
return True
time.sleep(0.1)
return False