from __future__ import annotations
from typing import Any, Dict, List, Optional
from pydantic import BaseModel, Field, ConfigDict, model_validator
from sim_panel.schema.types import JSONObject, PolicyName, ColumnSpec, EventType, JSONValue
SCHEMA_VERSION = "0.1.0"
def _unwrap_jsonvalue(x: Any) -> Any:
while isinstance(x, JSONValue):
x = x.root
return x
def _unwrap_json_tree(x: Any) -> Any:
x = _unwrap_jsonvalue(x)
if isinstance(x, list):
return [_unwrap_json_tree(v) for v in x]
if isinstance(x, dict):
return {k: _unwrap_json_tree(v) for k, v in x.items()}
return x
def _get_optional_list_of_str(d: Dict[str, Any], key: str) -> Optional[List[str]]:
value = d.get(key)
if value is None:
return None
if not isinstance(value, list) or any(not isinstance(v, str) for v in value):
raise ValueError(f"traces.{key} must be a list of strings if provided.")
return value
[docs]
class EventV0_1_0(BaseModel):
"""
v0.1.0 supports two event types:
1) selection (policy == "self_selection" only)
- A choice set is presented at (panelist_id, t).
- The panelist selects a subset (possibly empty).
- This row records choice_set + selected_product_ids.
2) evaluation (policy in {"random","manual","self_selection"})
- A single (panelist_id, product_id, t) evaluation.
- Records product_display plus flexible outcomes/traces.
Note:
- selection rows are only allowed when policy=="self_selection".
- for self_selection evaluation rows, selection_id is required to link back.
"""
model_config = ConfigDict(extra="forbid") #rejects unknown fields
# version and identity
schema_version: str = Field(default=SCHEMA_VERSION)
event_id: str = Field(..., description="Deterministic unique id for the event row.")
# event type and policy
event_type: EventType = Field(..., description="Event type: selection|evaluation.")
policy: PolicyName = Field(..., description="Policy: random|manual|self_selection.")
# panel/time keys
panelist_id: str
t: int = Field(..., ge=0, description="Period index (0-based).")
# Linking: evaluation rows can reference the selection row at (panelist_id, t)
selection_id: Optional[str] = Field(
default=None,
description="Deterministic id for the selection event (required for self_selection evaluation rows).",
)
# selection payload (selection rows only)
choice_set: Optional[List[str]] = Field(
default=None,
description="Choice set (list of product_ids) presented to the panelist (selection rows).",
)
selected_product_ids: Optional[List[str]] = Field(
default=None,
description="Product ids requested by the panelist (selection rows; may be empty). May differ from executed ids in traces.",
)
# evaluation payload (evaluation rows only)
product_id: Optional[str] = Field(
default=None,
description="Evaluated product_id (required for evaluation rows).",
)
product_display: Optional[str] = Field(
default=None,
description="Panelist-facing display text (required for evaluation rows).",
)
# optional feature payloads (JSON; can be {})
panelist_features: JSONObject = Field(default_factory=dict)
product_features: JSONObject = Field(default_factory=dict)
# flexible outcomes / traces (scalar or panel or nested)
outcomes: Optional[JSONObject] = Field(
default=None,
description="Outcome(s) as JSON, e.g. {'rating': 4.2} or {'rating':4.2,'quality':3.8}.",
)
traces: Optional[JSONObject] = Field(
default=None,
description="Trace(s) as JSON, e.g. {'review_text':'...', 'rationale':'...'}",
)
@model_validator(mode="after")
def _cross_field_rules(self) -> "EventV0_1_0":
# selection stage only applies to self_selection policy
if self.event_type == "selection" and self.policy != "self_selection":
raise ValueError("event_type='selection' is only allowed when policy=='self_selection'.")
if self.event_type == "selection":
if self.choice_set is None:
raise ValueError("selection event requires choice_set.")
if self.selected_product_ids is None:
raise ValueError("selection event requires selected_product_ids (can be empty list).")
# selection rows must NOT include evaluation-only fields
if any(
x is not None
for x in (self.product_id, self.product_display, self.outcomes)
):
raise ValueError("selection event must not include product_id/product_display/outcomes.")
# executed ids, not raw selected ids, must be contained in choice_set
choice = set(self.choice_set)
selected = list(self.selected_product_ids)
if len(selected) != len(set(selected)):
raise ValueError("selected_product_ids must not contain duplicates.")
traces = _unwrap_json_tree(self.traces) if self.traces is not None else {}
if not isinstance(traces, dict):
traces = {}
executed = _get_optional_list_of_str(traces, "executed_product_ids")
dropped = _get_optional_list_of_str(traces, "dropped_product_ids")
if executed is not None:
if len(executed) != len(set(executed)):
raise ValueError("traces.executed_product_ids must not contain duplicates.")
for pid in executed:
if pid not in choice:
raise ValueError(f"executed_product_ids contains {pid!r} not in choice_set.")
if dropped is not None:
if len(dropped) != len(set(dropped)):
raise ValueError("traces.dropped_product_ids must not contain duplicates.")
if executed is not None and dropped is not None:
overlap = set(executed) & set(dropped)
if overlap:
raise ValueError(f"executed_product_ids and dropped_product_ids overlap: {sorted(overlap)}")
# selection_id is allowed but not required on selection rows
return self
# evaluation row
if self.event_type == "evaluation":
if self.product_id is None:
raise ValueError("evaluation event requires product_id.")
if self.product_display is None:
raise ValueError("evaluation event requires product_display.")
# evaluation rows must NOT include selection-only payloads
if self.choice_set is not None or self.selected_product_ids is not None:
raise ValueError("evaluation event must not include choice_set/selected_product_ids.")
# Option A: self_selection evaluation rows must link back to selection
if self.policy == "self_selection" and self.selection_id is None:
raise ValueError("self_selection evaluation requires selection_id.")
return self
# unreachable (event_type is Literal)
return self
COLUMNS: List[ColumnSpec] = [
{"name": "schema_version", "dtype": "string", "required": True, "description": "Schema version."},
{"name": "event_id", "dtype": "string", "required": True, "description": "Deterministic unique event id."},
{"name": "event_type", "dtype": "string", "required": True, "description": "Event type: selection|evaluation."},
{"name": "policy", "dtype": "string", "required": True, "description": "Policy: random|manual|self_selection."},
{"name": "panelist_id", "dtype": "string", "required": True, "description": "Panelist identifier."},
{"name": "t", "dtype": "int", "required": True, "description": "Period index (0-based)."},
{"name": "selection_id", "dtype": "string", "required": False, "description": "Links evaluation rows to selection row."},
{"name": "choice_set", "dtype": "json", "required": False, "description": "Presented choice set (selection rows only)."},
{"name": "selected_product_ids", "dtype": "json", "required": False, "description": "Product ids requested by the panelist (selection rows only). May differ from executed ids in traces."},
{"name": "product_id", "dtype": "string", "required": False, "description": "Evaluated product id (evaluation rows only)."},
{"name": "product_display", "dtype": "string", "required": False, "description": "Displayed product text (evaluation rows only)."},
{"name": "panelist_features", "dtype": "json", "required": True, "description": "Panelist features JSON (may be {})."},
{"name": "product_features", "dtype": "json", "required": True, "description": "Product features JSON (may be {})."},
{"name": "outcomes", "dtype": "json", "required": False, "description": "Outcome(s) JSON (evaluation rows only)."},
{"name": "traces", "dtype": "json", "required": False, "description": "Trace(s) JSON."},
]
EventV0_1_0.model_rebuild()