Source code for sim_panel.schema.versions.v0_1_0

from __future__ import annotations

from typing import Any, Dict, List, Optional

from pydantic import BaseModel, Field, ConfigDict, model_validator

from sim_panel.schema.types import JSONObject, PolicyName, ColumnSpec, EventType, JSONValue


SCHEMA_VERSION = "0.1.0"

def _unwrap_jsonvalue(x: Any) -> Any:
    while isinstance(x, JSONValue):
        x = x.root
    return x

def _unwrap_json_tree(x: Any) -> Any:
    x = _unwrap_jsonvalue(x)
    if isinstance(x, list):
        return [_unwrap_json_tree(v) for v in x]
    if isinstance(x, dict):
        return {k: _unwrap_json_tree(v) for k, v in x.items()}
    return x

def _get_optional_list_of_str(d: Dict[str, Any], key: str) -> Optional[List[str]]:
    value = d.get(key)
    if value is None:
        return None
    if not isinstance(value, list) or any(not isinstance(v, str) for v in value):
        raise ValueError(f"traces.{key} must be a list of strings if provided.")
    return value

[docs] class EventV0_1_0(BaseModel): """ v0.1.0 supports two event types: 1) selection (policy == "self_selection" only) - A choice set is presented at (panelist_id, t). - The panelist selects a subset (possibly empty). - This row records choice_set + selected_product_ids. 2) evaluation (policy in {"random","manual","self_selection"}) - A single (panelist_id, product_id, t) evaluation. - Records product_display plus flexible outcomes/traces. Note: - selection rows are only allowed when policy=="self_selection". - for self_selection evaluation rows, selection_id is required to link back. """ model_config = ConfigDict(extra="forbid") #rejects unknown fields # version and identity schema_version: str = Field(default=SCHEMA_VERSION) event_id: str = Field(..., description="Deterministic unique id for the event row.") # event type and policy event_type: EventType = Field(..., description="Event type: selection|evaluation.") policy: PolicyName = Field(..., description="Policy: random|manual|self_selection.") # panel/time keys panelist_id: str t: int = Field(..., ge=0, description="Period index (0-based).") # Linking: evaluation rows can reference the selection row at (panelist_id, t) selection_id: Optional[str] = Field( default=None, description="Deterministic id for the selection event (required for self_selection evaluation rows).", ) # selection payload (selection rows only) choice_set: Optional[List[str]] = Field( default=None, description="Choice set (list of product_ids) presented to the panelist (selection rows).", ) selected_product_ids: Optional[List[str]] = Field( default=None, description="Product ids requested by the panelist (selection rows; may be empty). May differ from executed ids in traces.", ) # evaluation payload (evaluation rows only) product_id: Optional[str] = Field( default=None, description="Evaluated product_id (required for evaluation rows).", ) product_display: Optional[str] = Field( default=None, description="Panelist-facing display text (required for evaluation rows).", ) # optional feature payloads (JSON; can be {}) panelist_features: JSONObject = Field(default_factory=dict) product_features: JSONObject = Field(default_factory=dict) # flexible outcomes / traces (scalar or panel or nested) outcomes: Optional[JSONObject] = Field( default=None, description="Outcome(s) as JSON, e.g. {'rating': 4.2} or {'rating':4.2,'quality':3.8}.", ) traces: Optional[JSONObject] = Field( default=None, description="Trace(s) as JSON, e.g. {'review_text':'...', 'rationale':'...'}", ) @model_validator(mode="after") def _cross_field_rules(self) -> "EventV0_1_0": # selection stage only applies to self_selection policy if self.event_type == "selection" and self.policy != "self_selection": raise ValueError("event_type='selection' is only allowed when policy=='self_selection'.") if self.event_type == "selection": if self.choice_set is None: raise ValueError("selection event requires choice_set.") if self.selected_product_ids is None: raise ValueError("selection event requires selected_product_ids (can be empty list).") # selection rows must NOT include evaluation-only fields if any( x is not None for x in (self.product_id, self.product_display, self.outcomes) ): raise ValueError("selection event must not include product_id/product_display/outcomes.") # executed ids, not raw selected ids, must be contained in choice_set choice = set(self.choice_set) selected = list(self.selected_product_ids) if len(selected) != len(set(selected)): raise ValueError("selected_product_ids must not contain duplicates.") traces = _unwrap_json_tree(self.traces) if self.traces is not None else {} if not isinstance(traces, dict): traces = {} executed = _get_optional_list_of_str(traces, "executed_product_ids") dropped = _get_optional_list_of_str(traces, "dropped_product_ids") if executed is not None: if len(executed) != len(set(executed)): raise ValueError("traces.executed_product_ids must not contain duplicates.") for pid in executed: if pid not in choice: raise ValueError(f"executed_product_ids contains {pid!r} not in choice_set.") if dropped is not None: if len(dropped) != len(set(dropped)): raise ValueError("traces.dropped_product_ids must not contain duplicates.") if executed is not None and dropped is not None: overlap = set(executed) & set(dropped) if overlap: raise ValueError(f"executed_product_ids and dropped_product_ids overlap: {sorted(overlap)}") # selection_id is allowed but not required on selection rows return self # evaluation row if self.event_type == "evaluation": if self.product_id is None: raise ValueError("evaluation event requires product_id.") if self.product_display is None: raise ValueError("evaluation event requires product_display.") # evaluation rows must NOT include selection-only payloads if self.choice_set is not None or self.selected_product_ids is not None: raise ValueError("evaluation event must not include choice_set/selected_product_ids.") # Option A: self_selection evaluation rows must link back to selection if self.policy == "self_selection" and self.selection_id is None: raise ValueError("self_selection evaluation requires selection_id.") return self # unreachable (event_type is Literal) return self
COLUMNS: List[ColumnSpec] = [ {"name": "schema_version", "dtype": "string", "required": True, "description": "Schema version."}, {"name": "event_id", "dtype": "string", "required": True, "description": "Deterministic unique event id."}, {"name": "event_type", "dtype": "string", "required": True, "description": "Event type: selection|evaluation."}, {"name": "policy", "dtype": "string", "required": True, "description": "Policy: random|manual|self_selection."}, {"name": "panelist_id", "dtype": "string", "required": True, "description": "Panelist identifier."}, {"name": "t", "dtype": "int", "required": True, "description": "Period index (0-based)."}, {"name": "selection_id", "dtype": "string", "required": False, "description": "Links evaluation rows to selection row."}, {"name": "choice_set", "dtype": "json", "required": False, "description": "Presented choice set (selection rows only)."}, {"name": "selected_product_ids", "dtype": "json", "required": False, "description": "Product ids requested by the panelist (selection rows only). May differ from executed ids in traces."}, {"name": "product_id", "dtype": "string", "required": False, "description": "Evaluated product id (evaluation rows only)."}, {"name": "product_display", "dtype": "string", "required": False, "description": "Displayed product text (evaluation rows only)."}, {"name": "panelist_features", "dtype": "json", "required": True, "description": "Panelist features JSON (may be {})."}, {"name": "product_features", "dtype": "json", "required": True, "description": "Product features JSON (may be {})."}, {"name": "outcomes", "dtype": "json", "required": False, "description": "Outcome(s) JSON (evaluation rows only)."}, {"name": "traces", "dtype": "json", "required": False, "description": "Trace(s) JSON."}, ] EventV0_1_0.model_rebuild()