Mechanisms we add to the flow
- Audit log: who ran what, when, from which source
- Schema contract: the columns and types a step promises
- Lineage: trace an output back to its inputs
def append_audit(event):
"""One provenance record per pipeline step."""
record = {**event, "ts": now_iso()}
with open(AUDIT_LOG, "a") as f:
f.write(json.dumps(record) + "\n")
contract = {
"columns": {"key": "str", "count": "int"},
"source": "curated/aggregate.parquet",
"schema_version": 1,
"retention_days": 365,
}