feat(2): Q4.7 plausible — §4.3 event-tracking functional tests + PARITY.md; /api/health readiness probe
- functional/test_event_tracking.py: 2 recipe-specific tests (P3) — register site → POST /api/event (browser UA) → read back from clickhouse events_v2. test_pageview_event_roundtrip asserts stored name/pathname/hostname; test_custom_event_roundtrip asserts a custom-named goal lands under that name. - test_health_check.py: probe /api/health (200, asserts clickhouse+postgres+sites_cache ready) — fixes the broken/unterminated docstring from the prior WIP edit; / is unreliable (500 init / 302 ready). - recipe_meta.py: HEALTH_PATH=/api/health, HEALTH_OK=(200,); comment corrected. - PARITY.md: P2 vacuous (no recipe-maintainer corpus); documents P3/P4 coverage. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
146
tests/plausible/functional/test_event_tracking.py
Normal file
146
tests/plausible/functional/test_event_tracking.py
Normal file
@ -0,0 +1,146 @@
|
||||
"""plausible — Phase-2 §4.3 recipe-specific functional tests (event tracking).
|
||||
|
||||
plausible's *raison d'être* is ingesting analytics events and storing them in ClickHouse. These two
|
||||
tests prove the full create-and-read-back path end to end — they are NOT health/200 stand-ins:
|
||||
|
||||
* test_pageview_event_roundtrip — POST a `pageview` to the public /api/event ingestion endpoint,
|
||||
then read the row back out of the ClickHouse `events_v2` table (the primary object: a tracked
|
||||
event). §4.3 "track a test event, query it back".
|
||||
* test_custom_event_roundtrip — POST a *custom-named* event (a goal/conversion, plausible's
|
||||
distinctive non-pageview tracking path) and confirm it lands under that name. Exercises a
|
||||
characteristic feature beyond the basic pageview.
|
||||
|
||||
Both assert real app state (the event reached the analytics store), not just the HTTP 202 ack.
|
||||
|
||||
plausible only ingests events for *known* sites — the in-memory `sites_cache` gates ingestion and
|
||||
drops events for unregistered domains (empirically confirmed: an event for an unregistered domain
|
||||
never appears in events_v2). So each test first registers a site row in the metadata postgres, then
|
||||
POSTs repeatedly while polling ClickHouse: the sites_cache must refresh to admit the new site and the
|
||||
event write-buffer must flush to ClickHouse, so the first landing is not instantaneous. Re-POSTing the
|
||||
same event is safe — we assert the row count is >= 1.
|
||||
|
||||
No recipe-maintainer corpus exists for plausible (recipe-info/plausible/ has no tests/), so these are
|
||||
net-new recipe-specific tests rather than parity ports — see tests/plausible/PARITY.md.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
import sys
|
||||
import time
|
||||
|
||||
sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "..", "..", "runner"))
|
||||
from harness import http as harness_http # noqa: E402
|
||||
from harness import lifecycle # noqa: E402
|
||||
|
||||
# A real browser User-Agent — plausible's ingestion drops requests from bot/library UAs (e.g. the
|
||||
# default python-urllib UA), so the event would silently never reach ClickHouse without this.
|
||||
_UA = (
|
||||
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 "
|
||||
"(KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
|
||||
)
|
||||
|
||||
|
||||
def _ch(domain: str, sql: str) -> str:
|
||||
"""Run a ClickHouse query against the `plausible_events_db` service; return stdout (stripped)."""
|
||||
return lifecycle.exec_in_app(
|
||||
domain,
|
||||
["clickhouse-client", "--database", "plausible_events_db", "--query", sql],
|
||||
service="plausible_events_db",
|
||||
).strip()
|
||||
|
||||
|
||||
def _register_site(domain: str, site: str) -> None:
|
||||
"""Insert a site row into the metadata postgres (`db` service) so plausible will ingest events for
|
||||
it. Idempotent (ON CONFLICT DO NOTHING)."""
|
||||
sql = (
|
||||
"INSERT INTO sites (domain, timezone, inserted_at, updated_at, native_stats_start_at) "
|
||||
f"VALUES ('{site}','UTC', now(), now(), now()) ON CONFLICT (domain) DO NOTHING; "
|
||||
f"SELECT domain FROM sites WHERE domain = '{site}';"
|
||||
)
|
||||
out = lifecycle.exec_in_app(
|
||||
domain, ["psql", "-U", "plausible", "-d", "plausible", "-tAc", sql], service="db"
|
||||
).strip()
|
||||
assert out == site, f"site {site!r} not registered in postgres (got {out!r})"
|
||||
|
||||
|
||||
def _post_event(base_domain: str, site: str, name: str, pathname: str) -> int:
|
||||
"""POST one event to the public ingestion endpoint; return the HTTP status (plausible acks 202)."""
|
||||
status, _ = harness_http.http_post(
|
||||
f"https://{base_domain}/api/event",
|
||||
data={"name": name, "url": f"https://{site}{pathname}", "domain": site},
|
||||
headers={"User-Agent": _UA, "X-Forwarded-For": "203.0.113.9"},
|
||||
timeout=15,
|
||||
)
|
||||
return status
|
||||
|
||||
|
||||
def _ingest_and_count(
|
||||
base_domain: str,
|
||||
site: str,
|
||||
name: str,
|
||||
pathname: str,
|
||||
max_wait: int = 210,
|
||||
interval: int = 10,
|
||||
) -> int:
|
||||
"""Register the site, then POST the event on a poll loop until its row appears in ClickHouse.
|
||||
|
||||
Returns the events_v2 row count for (pathname, name). Raises if nothing lands within max_wait —
|
||||
a genuinely-broken ingestion path therefore FAILS (this is not a vacuous check)."""
|
||||
_register_site(base_domain, site)
|
||||
count_sql = (
|
||||
f"SELECT count() FROM events_v2 WHERE pathname = '{pathname}' AND name = '{name}'"
|
||||
)
|
||||
deadline = time.time() + max_wait
|
||||
last_status = None
|
||||
while True:
|
||||
last_status = _post_event(base_domain, site, name, pathname)
|
||||
assert last_status == 202, f"POST /api/event for {name!r} → HTTP {last_status} (expected 202)"
|
||||
time.sleep(interval)
|
||||
raw = _ch(base_domain, count_sql)
|
||||
count = int(raw) if raw.isdigit() else 0
|
||||
if count >= 1:
|
||||
return count
|
||||
if time.time() >= deadline:
|
||||
raise AssertionError(
|
||||
f"event name={name!r} pathname={pathname!r} for site={site!r} never reached "
|
||||
f"ClickHouse events_v2 within {max_wait}s (last POST status={last_status}, "
|
||||
f"last count={count})"
|
||||
)
|
||||
|
||||
|
||||
def test_pageview_event_roundtrip(live_app):
|
||||
"""Track a pageview event via /api/event, read it back from ClickHouse (§4.3 primary object)."""
|
||||
site = "ccci-pageview.example"
|
||||
pathname = "/ccci-pageview-roundtrip"
|
||||
count = _ingest_and_count(live_app, site, "pageview", pathname)
|
||||
assert count >= 1, f"expected >=1 pageview row, got {count}"
|
||||
|
||||
# Read-back: confirm the stored row carries the data we sent (real app state, not just a count).
|
||||
row = _ch(
|
||||
live_app,
|
||||
f"SELECT name, pathname, hostname FROM events_v2 "
|
||||
f"WHERE pathname = '{pathname}' AND name = 'pageview' LIMIT 1 FORMAT TabSeparated",
|
||||
)
|
||||
name, stored_path, hostname = (row.split("\t") + ["", "", ""])[:3]
|
||||
assert name == "pageview", f"stored event name {name!r} != 'pageview'"
|
||||
assert stored_path == pathname, f"stored pathname {stored_path!r} != {pathname!r}"
|
||||
assert hostname == site, f"stored hostname {hostname!r} != site {site!r}"
|
||||
|
||||
|
||||
def test_custom_event_roundtrip(live_app):
|
||||
"""Track a custom-named event (a goal/conversion — plausible's distinctive non-pageview path) and
|
||||
confirm it lands under that exact name in ClickHouse, distinct from the pageview path."""
|
||||
site = "ccci-goal.example"
|
||||
pathname = "/ccci-custom-event"
|
||||
event_name = "ccci-Signup"
|
||||
count = _ingest_and_count(live_app, site, event_name, pathname)
|
||||
assert count >= 1, f"expected >=1 custom-event row, got {count}"
|
||||
|
||||
# The row must be stored under the custom name (not coerced to 'pageview') — proves the
|
||||
# custom-event/goal ingestion path works.
|
||||
stored_name = _ch(
|
||||
live_app,
|
||||
f"SELECT name FROM events_v2 WHERE pathname = '{pathname}' LIMIT 1",
|
||||
)
|
||||
assert stored_name == event_name, f"custom event stored as {stored_name!r}, expected {event_name!r}"
|
||||
@ -10,9 +10,13 @@ from harness import http as harness_http # noqa: E402
|
||||
|
||||
|
||||
def test_plausible_root_serves(live_app):
|
||||
"""GET / → 200 or 302 (redirect to login or app shell)."""
|
||||
url = f"https://{live_app}/"
|
||||
"""GET /api/health → 200 (clickhouse+postgres ready).
|
||||
|
||||
`/` itself 500s via auth_controller under DISABLE_AUTH, so it is NOT a
|
||||
reliable health probe; the dedicated /api/health endpoint is.
|
||||
"""
|
||||
url = f"https://{live_app}/api/health"
|
||||
status, _ = harness_http.retry_http_get(
|
||||
url, expect_status=(200, 302), max_wait=60, interval=3
|
||||
url, expect_status=(200,), max_wait=60, interval=3
|
||||
)
|
||||
assert status in (200, 302), f"GET {url} HTTP {status}"
|
||||
assert status == 200, f"GET {url} HTTP {status}"
|
||||
|
||||
Reference in New Issue
Block a user