All checks were successful
continuous-integration/drone/push Build is passing
tests/concurrency/ — NOT in the default `pytest tests/unit` gate; run explicitly with `pytest tests/concurrency -q`. flock/prctl/alarm are never mocked: helper subprocesses (helpers.py) hold real locks and install the real lifetime guards; locks live in a per-test tmp dir via CCCI_APP_LOCK_DIR; every helper (and recorded grandchild) is reaped by fixture cleanup. - test_locks.py (cases 1-4): SIGKILL auto-release; LOCK_NB held/unheld semantics; PEP 446 fd-not-inherited (holder's child survives, lock still releases); same-domain second acquire blocks until first holder exits. - test_janitor.py (cases 5-12): orphan reaped once + lockfile unlinked; live holder never reaped + logged; new-run acquire blocks until a slow reap completes (reap-under-probe-lock); two overlapping janitors -> exactly one reaps (flock arbitration); reboot sim (no lockfile) reaps immediately with no age wait; >120min-held lock flagged 'possible leaked run' and NOT stolen; warm/canonical names never probed (no lockfile even created); directory-as-lockfile and missing lock dir degrade to skip+log, never crash. - test_lifetime.py (cases 13-16): PDEATHSIG (wrapper parent SIGKILL'd -> guarded child TERM'd, teardown marker, lock released); already-orphaned helper REFUSES to run (ppid race); 2s deadline alarm -> teardown + exit 142 + lock released; SIGTERM -> teardown + exit 143 + lock released. - test_abra_dir.py (cases 17-19 + 18b): per-run dir built + $ABRA_DIR exported before the first abra call (recording stub abra on PATH); two CONCURRENT same-recipe fetch+checkout flows into different ABRA_DIRs -> divergent correct trees, canonical staged clone untouched; .env written through the servers/ symlink lands in the canonical path (env_get/env_set agree); manual runs get pid-suffixed dirs. On cc-ci: pytest tests/concurrency -q -> 20 passed; tests/unit -> 138 passed; lint PASS.
109 lines
3.5 KiB
Python
109 lines
3.5 KiB
Python
"""Shared utilities for the real-kernel concurrency suite (imported by the test modules; the
|
|
fixtures in conftest.py wrap these). No flock mocking anywhere — probes use real LOCK_NB."""
|
|
|
|
from __future__ import annotations
|
|
|
|
import contextlib
|
|
import fcntl
|
|
import os
|
|
import signal
|
|
import subprocess
|
|
import sys
|
|
import time
|
|
|
|
sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "..", "runner"))
|
|
from harness import lifecycle # noqa: E402
|
|
|
|
HELPERS = os.path.join(os.path.dirname(__file__), "helpers.py")
|
|
DOMAIN = "test-abc123.ci.commoninternet.net" # matches RUN_APP_RE
|
|
|
|
|
|
class HelperPool:
|
|
"""Spawns helpers.py subprocesses and GUARANTEES their cleanup (incl. recorded grandchild
|
|
pids from `hold-with-child`/`wrapper` markers) — no leaked children in the test VM."""
|
|
|
|
def __init__(self, out_dir: str):
|
|
self.out_dir = out_dir
|
|
self.procs: list[subprocess.Popen] = []
|
|
self.extra_pids: list[int] = []
|
|
self._n = 0
|
|
|
|
def spawn(self, *args: str, env_extra: dict | None = None) -> tuple[subprocess.Popen, str]:
|
|
"""Start `helpers.py <args...>`; returns (proc, marker_file)."""
|
|
self._n += 1
|
|
out = os.path.join(self.out_dir, f"helper-{self._n}.out")
|
|
env = dict(os.environ, CCCI_HELPER_OUT=out, **(env_extra or {}))
|
|
p = subprocess.Popen( # noqa: S603
|
|
[sys.executable, HELPERS, *args],
|
|
env=env,
|
|
stdout=subprocess.DEVNULL,
|
|
stderr=subprocess.STDOUT,
|
|
)
|
|
self.procs.append(p)
|
|
return p, out
|
|
|
|
def track_pid(self, pid: int) -> None:
|
|
self.extra_pids.append(pid)
|
|
|
|
def cleanup(self) -> None:
|
|
for p in self.procs:
|
|
if p.poll() is None:
|
|
p.kill()
|
|
with contextlib.suppress(subprocess.TimeoutExpired):
|
|
p.wait(timeout=10)
|
|
for pid in self.extra_pids:
|
|
with contextlib.suppress(OSError):
|
|
os.kill(pid, signal.SIGKILL)
|
|
|
|
|
|
def wait_marker(out: str, token: str, timeout: float = 15.0) -> str | None:
|
|
"""Poll a helper's marker file for a line containing `token`; returns the line or None."""
|
|
deadline = time.time() + timeout
|
|
while time.time() < deadline:
|
|
try:
|
|
with open(out) as f:
|
|
for line in f:
|
|
if token in line:
|
|
return line.strip()
|
|
except OSError:
|
|
pass
|
|
time.sleep(0.1)
|
|
return None
|
|
|
|
|
|
def lock_state(domain: str) -> str:
|
|
"""'held' | 'free' | 'absent' for the domain's lockfile, probed with a REAL LOCK_NB."""
|
|
path = lifecycle._app_lock_path(domain) # noqa: SLF001
|
|
if not os.path.exists(path):
|
|
return "absent"
|
|
with open(path, "a") as f:
|
|
try:
|
|
fcntl.flock(f, fcntl.LOCK_EX | fcntl.LOCK_NB)
|
|
return "free"
|
|
except BlockingIOError:
|
|
return "held"
|
|
|
|
|
|
def wait_lock_state(domain: str, want: str, timeout: float = 10.0) -> str:
|
|
"""Poll until lock_state(domain) == want (kernel release on process death is fast, but give
|
|
the scheduler room). Returns the final observed state."""
|
|
deadline = time.time() + timeout
|
|
state = lock_state(domain)
|
|
while state != want and time.time() < deadline:
|
|
time.sleep(0.1)
|
|
state = lock_state(domain)
|
|
return state
|
|
|
|
|
|
def pid_alive(pid: int) -> bool:
|
|
return os.path.exists(f"/proc/{pid}")
|
|
|
|
|
|
def wait_pid_gone(pid: int, timeout: float = 15.0) -> bool:
|
|
deadline = time.time() + timeout
|
|
while time.time() < deadline:
|
|
if not pid_alive(pid):
|
|
return True
|
|
time.sleep(0.1)
|
|
return False
|