"""plausible — Phase-2 §4.3 recipe-specific functional tests (event tracking). plausible's *raison d'être* is ingesting analytics events and storing them in ClickHouse. These two tests prove the full create-and-read-back path end to end — they are NOT health/200 stand-ins: * test_pageview_event_roundtrip — POST a `pageview` to the public /api/event ingestion endpoint, then read the row back out of the ClickHouse `events_v2` table (the primary object: a tracked event). §4.3 "track a test event, query it back". * test_custom_event_roundtrip — POST a *custom-named* event (a goal/conversion, plausible's distinctive non-pageview tracking path) and confirm it lands under that name. Exercises a characteristic feature beyond the basic pageview. Both assert real app state (the event reached the analytics store), not just the HTTP 202 ack. plausible only ingests events for *known* sites — the in-memory `sites_cache` gates ingestion and drops events for unregistered domains (empirically confirmed: an event for an unregistered domain never appears in events_v2). So each test first registers a site row in the metadata postgres, then POSTs repeatedly while polling ClickHouse: the sites_cache must refresh to admit the new site and the event write-buffer must flush to ClickHouse, so the first landing is not instantaneous. Re-POSTing the same event is safe — we assert the row count is >= 1. No recipe-maintainer corpus exists for plausible (recipe-info/plausible/ has no tests/), so these are net-new recipe-specific tests rather than parity ports — see tests/plausible/PARITY.md. """ from __future__ import annotations import os import sys import time sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "..", "..", "runner")) from harness import http as harness_http # noqa: E402 from harness import lifecycle # noqa: E402 # A real browser User-Agent — plausible's ingestion drops requests from bot/library UAs (e.g. the # default python-urllib UA), so the event would silently never reach ClickHouse without this. _UA = ( "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 " "(KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36" ) def _ch(domain: str, sql: str) -> str: """Run a ClickHouse query against the `plausible_events_db` service; return stdout (stripped).""" return lifecycle.exec_in_app( domain, ["clickhouse-client", "--database", "plausible_events_db", "--query", sql], service="plausible_events_db", ).strip() def _register_site(domain: str, site: str) -> None: """Insert a site row into the metadata postgres (`db` service) so plausible will ingest events for it. Idempotent (ON CONFLICT DO NOTHING).""" sql = ( "INSERT INTO sites (domain, timezone, inserted_at, updated_at, native_stats_start_at) " f"VALUES ('{site}','UTC', now(), now(), now()) ON CONFLICT (domain) DO NOTHING; " f"SELECT domain FROM sites WHERE domain = '{site}';" ) out = lifecycle.exec_in_app( domain, ["psql", "-q", "-U", "plausible", "-d", "plausible", "-tAc", sql], service="db" ).strip() assert out == site, f"site {site!r} not registered in postgres (got {out!r})" def _post_event(base_domain: str, site: str, name: str, pathname: str) -> int: """POST one event to the public ingestion endpoint; return the HTTP status (plausible acks 202).""" status, _ = harness_http.http_post( f"https://{base_domain}/api/event", data={"name": name, "url": f"https://{site}{pathname}", "domain": site}, headers={"User-Agent": _UA, "X-Forwarded-For": "203.0.113.9"}, timeout=15, ) return status def _ingest_and_count( base_domain: str, site: str, name: str, pathname: str, max_wait: int = 210, interval: int = 10, ) -> int: """Register the site, then POST the event on a poll loop until its row appears in ClickHouse. Returns the events_v2 row count for (pathname, name). Raises if nothing lands within max_wait — a genuinely-broken ingestion path therefore FAILS (this is not a vacuous check).""" _register_site(base_domain, site) count_sql = f"SELECT count() FROM events_v2 WHERE pathname = '{pathname}' AND name = '{name}'" deadline = time.time() + max_wait last_status = None while True: last_status = _post_event(base_domain, site, name, pathname) assert ( last_status == 202 ), f"POST /api/event for {name!r} → HTTP {last_status} (expected 202)" time.sleep(interval) raw = _ch(base_domain, count_sql) count = int(raw) if raw.isdigit() else 0 if count >= 1: return count if time.time() >= deadline: raise AssertionError( f"event name={name!r} pathname={pathname!r} for site={site!r} never reached " f"ClickHouse events_v2 within {max_wait}s (last POST status={last_status}, " f"last count={count})" ) def test_pageview_event_roundtrip(live_app): """Track a pageview event via /api/event, read it back from ClickHouse (§4.3 primary object).""" site = "ccci-pageview.example" pathname = "/ccci-pageview-roundtrip" count = _ingest_and_count(live_app, site, "pageview", pathname) assert count >= 1, f"expected >=1 pageview row, got {count}" # Read-back: confirm the stored row carries the data we sent (real app state, not just a count). row = _ch( live_app, f"SELECT name, pathname, hostname FROM events_v2 " f"WHERE pathname = '{pathname}' AND name = 'pageview' LIMIT 1 FORMAT TabSeparated", ) name, stored_path, hostname = (row.split("\t") + ["", "", ""])[:3] assert name == "pageview", f"stored event name {name!r} != 'pageview'" assert stored_path == pathname, f"stored pathname {stored_path!r} != {pathname!r}" assert hostname == site, f"stored hostname {hostname!r} != site {site!r}" def test_custom_event_roundtrip(live_app): """Track a custom-named event (a goal/conversion — plausible's distinctive non-pageview path) and confirm it lands under that exact name in ClickHouse, distinct from the pageview path.""" site = "ccci-goal.example" pathname = "/ccci-custom-event" event_name = "ccci-Signup" count = _ingest_and_count(live_app, site, event_name, pathname) assert count >= 1, f"expected >=1 custom-event row, got {count}" # The row must be stored under the custom name (not coerced to 'pageview') — proves the # custom-event/goal ingestion path works. stored_name = _ch( live_app, f"SELECT name FROM events_v2 WHERE pathname = '{pathname}' LIMIT 1", ) assert ( stored_name == event_name ), f"custom event stored as {stored_name!r}, expected {event_name!r}"