Source code for sim_panel.outcomes.specs

from __future__ import annotations

from dataclasses import dataclass
from typing import Any, Dict, List, Literal, Mapping, Optional, Tuple


FieldType = Literal["int", "float", "categorical", "bool", "text", "json"]
AnalysisType = Literal["continuous", "binary", "nominal", "ordinal"]


[docs]
@dataclass(frozen=True)
class FieldSpec:
    """
    A single questionnaire field. Used for both outcomes and traces.

    name:
      The JSON key that must appear in the model output under outcomes/traces.

    type:
      Controls prompt rendering and validation.

    question:
      The user-facing question / instruction to the panelist.

    instruction:
      Optional additional formatting guidance.

    choices:
      Optional list of allowed values (strongly recommended for categorical / int with discrete choices).

    required:
      If False, missing key is allowed. If True, missing is a validation error.

    analysis_type:
      Optional analysis-facing semantic type. This does not affect prompt rendering
      or payload validation used by existing outcome modules. It is intended for
      downstream analysis modules such as regression.

      Allowed values:
      - "continuous"
      - "binary"
      - "nominal"
      - "ordinal"

    choice_order:
      Optional explicit ordering for ordinal outcomes. If omitted, downstream
      analysis may fall back to `choices` if appropriate.      

    min_value/max_value:
      Optional numeric constraints for int/float.
    """
    name: str
    type: FieldType
    question: str
    instruction: Optional[str] = None
    choices: Optional[List[Any]] = None
    required: bool = True
    min_value: Optional[float] = None
    max_value: Optional[float] = None
    analysis_type: Optional[AnalysisType] = None
    choice_order: Optional[Tuple[Any]] = None


[docs]
    def validate_value(self, v: Any) -> Optional[str]:
        if v is None:
            return None if not self.required else f"Field '{self.name}' is required but null."
        t = self.type

        if t == "int":
            if not isinstance(v, int):
                return f"Field '{self.name}' expects int, got {type(v).__name__}."
            if self.min_value is not None and v < int(self.min_value):
                return f"Field '{self.name}' must be >= {int(self.min_value)}, got {v}."
            if self.max_value is not None and v > int(self.max_value):
                return f"Field '{self.name}' must be <= {int(self.max_value)}, got {v}."
        elif t == "float":
            if not isinstance(v, (int, float)):
                return f"Field '{self.name}' expects float, got {type(v).__name__}."
            fv = float(v)
            if self.min_value is not None and fv < float(self.min_value):
                return f"Field '{self.name}' must be >= {float(self.min_value)}, got {fv}."
            if self.max_value is not None and fv > float(self.max_value):
                return f"Field '{self.name}' must be <= {float(self.max_value)}, got {fv}."
        elif t == "bool":
            if not isinstance(v, bool):
                return f"Field '{self.name}' expects bool, got {type(v).__name__}."
        elif t == "categorical":
            if self.choices is None or len(self.choices) == 0:
                return f"Field '{self.name}' is categorical but has no choices."
        elif t == "text":
            if not isinstance(v, str):
                return f"Field '{self.name}' expects text (string), got {type(v).__name__}."
        elif t == "json":
            # Any JSON-serializable object is acceptable; we don't deeply validate here.
            if isinstance(v, (str, int, float, bool)) or v is None:
                # still valid JSON, but likely not intended; allow it.
                return None
            if not isinstance(v, (dict, list)):
                return f"Field '{self.name}' expects json (dict/list), got {type(v).__name__}."
        else:
            return f"Unknown field type '{t}' for field '{self.name}'."

        if self.choices is not None:
            if v not in self.choices:
                return f"Field '{self.name}' must be one of {self.choices}, got {v!r}."
        return None





[docs]
@dataclass(frozen=True)
class QuestionnaireSpec:
    """
    A questionnaire defines what the panelist must fill out after evaluation.

    - outcome_fields define the `event["outcomes"]` object.
    - trace_fields define the `event["traces"]` object (optional / free-form-ish).

    The model output is expected to be JSON:
      {"outcomes": {...}, "traces": {...}}
    """
    outcome_fields: Tuple[FieldSpec, ...]
    trace_fields: Tuple[FieldSpec, ...] = ()


[docs]
    def outcome_names(self) -> List[str]:
        return [f.name for f in self.outcome_fields]



[docs]
    def trace_names(self) -> List[str]:
        return [f.name for f in self.trace_fields]



[docs]
    def validate_payload(self, payload: Dict[str, Any]) -> Tuple[Optional[Dict[str, Any]], Optional[Dict[str, Any]], List[str]]:
        """
        Validate a parsed JSON payload against the questionnaire spec.
        Returns (outcomes, traces, errors).
        """
        errors: List[str] = []
        outcomes_obj = payload.get("outcomes")
        traces_obj = payload.get("traces")

        if outcomes_obj is None:
            errors.append("Missing top-level key 'outcomes'.")
            outcomes_obj = {}
        if not isinstance(outcomes_obj, dict):
            errors.append(f"Top-level 'outcomes' must be an object/dict, got {type(outcomes_obj).__name__}.")
            outcomes_obj = {}

        if traces_obj is None:
            traces_obj = {}
        if not isinstance(traces_obj, dict):
            errors.append(f"Top-level 'traces' must be an object/dict, got {type(traces_obj).__name__}.")
            traces_obj = {}

        outcomes: Dict[str, Any] = {}
        traces: Dict[str, Any] = {}

        # Validate outcomes
        for fs in self.outcome_fields:
            if fs.name not in outcomes_obj:
                if fs.required:
                    errors.append(f"Missing required outcome field '{fs.name}'.")
                continue
            v = outcomes_obj.get(fs.name)
            msg = fs.validate_value(v)
            if msg:
                errors.append(msg)
            else:
                outcomes[fs.name] = v

        # Validate traces
        for fs in self.trace_fields:
            if fs.name not in traces_obj:
                if fs.required:
                    errors.append(f"Missing required trace field '{fs.name}'.")
                continue
            v = traces_obj.get(fs.name)
            msg = fs.validate_value(v)
            if msg:
                errors.append(msg)
            else:
                traces[fs.name] = v

        # Extra keys are allowed, but we surface them as warnings inside errors (prefixed).
        extra_outcomes = set(outcomes_obj.keys()) - set(self.outcome_names())
        extra_traces = set(traces_obj.keys()) - set(self.trace_names())
        if extra_outcomes:
            errors.append(f"[warn] Extra outcome keys not in spec: {sorted(extra_outcomes)}")
        if extra_traces:
            errors.append(f"[warn] Extra trace keys not in spec: {sorted(extra_traces)}")

        # If there are hard errors (non-warn), treat as invalid
        hard_errors = [e for e in errors if not e.startswith("[warn]")]
        if hard_errors:
            return None, None, errors

        # If no trace fields specified, traces can be None downstream if desired.
        return outcomes, (traces if self.trace_fields else {}), errors



[docs]
    @staticmethod
    def from_config_dict(cfg: Mapping[str, Any]) -> "QuestionnaireSpec":
        """
        Build QuestionnaireSpec from a YAML-parsed dict.

        Expected shape:
          outcomes:
            fields:
              <name>:
                type: ...
                question: ...
                instruction: ...
                choices: [...]
                required: true/false
                min: ...
                max: ...
          traces:
            fields:
              <name>: ...
        """
        outcomes_cfg = cfg.get("outcomes", {}) if isinstance(cfg, Mapping) else {}
        traces_cfg = cfg.get("traces", {}) if isinstance(cfg, Mapping) else {}

        out_fields = _parse_fields(outcomes_cfg.get("fields", {}), section="outcomes")
        tr_fields = _parse_fields(traces_cfg.get("fields", {}), section="traces")

        if len(out_fields) == 0:
            raise ValueError("QuestionnaireSpec requires at least one outcomes.fields entry.")
        return QuestionnaireSpec(outcome_fields=tuple(out_fields), trace_fields=tuple(tr_fields))




def _parse_fields(fields_cfg: Any, section: str) -> List[FieldSpec]:
    if fields_cfg is None:
        return []
    if not isinstance(fields_cfg, Mapping):
        raise ValueError(f"{section}.fields must be a mapping, got {type(fields_cfg).__name__}.")

    out: List[FieldSpec] = []
    for name, raw in fields_cfg.items():
        if not isinstance(name, str) or not name:
            raise ValueError(f"{section}.fields has invalid field name: {name!r}")

        if not isinstance(raw, Mapping):
            raise ValueError(f"{section}.fields['{name}'] must be a mapping, got {type(raw).__name__}.")

        ftype = raw.get("type")
        question = raw.get("question")
        if not isinstance(ftype, str) or not ftype:
            raise ValueError(f"{section}.fields['{name}'] missing/invalid 'type'.")
        if not isinstance(question, str) or not question:
            raise ValueError(f"{section}.fields['{name}'] missing/invalid 'question'.")

        instruction = raw.get("instruction")
        if instruction is not None and not isinstance(instruction, str):
            raise ValueError(f"{section}.fields['{name}'].instruction must be string if provided.")

        choices = raw.get("choices")
        if choices is not None and not isinstance(choices, list):
            raise ValueError(f"{section}.fields['{name}'].choices must be a list if provided.")

        required = raw.get("required", True)
        if not isinstance(required, bool):
            raise ValueError(f"{section}.fields['{name}'].required must be bool if provided.")

        minv = raw.get("min")
        maxv = raw.get("max")
        if minv is not None and not isinstance(minv, (int, float)):
            raise ValueError(f"{section}.fields['{name}'].min must be numeric if provided.")
        if maxv is not None and not isinstance(maxv, (int, float)):
            raise ValueError(f"{section}.fields['{name}'].max must be numeric if provided.")

        analysis_type = raw.get("analysis_type")
        if analysis_type is not None:
            if analysis_type not in {"continuous", "binary", "nominal", "ordinal"}:
                raise ValueError(
                    f"{section}.fields['{name}'].analysis_type must be one of "
                    "['continuous', 'binary', 'nominal', 'ordinal'] if provided."
                )

        choice_order = raw.get("choice_order")
        if choice_order is not None and not isinstance(choice_order, list):
            raise ValueError(
                f"{section}.fields['{name}'].choice_order must be a list if provided."
            )

        choice_order_tuple = tuple(choice_order) if choice_order is not None else None


        fs = FieldSpec(
            name=name,
            type=ftype,  # type: ignore[assignment]
            question=question,
            instruction=instruction,
            choices=choices,
            required=required,
            min_value=float(minv) if minv is not None else None,
            max_value=float(maxv) if maxv is not None else None,
            analysis_type=analysis_type,
            choice_order=choice_order_tuple,
        )

        # A few sanity checks
        if fs.type == "categorical" and (fs.choices is None or len(fs.choices) == 0):
            raise ValueError(f"{section}.fields['{name}'] categorical requires non-empty choices.")

        if fs.analysis_type == "binary" and fs.choices is not None and len(fs.choices) != 2:
            raise ValueError(
                f"{section}.fields['{name}'] analysis_type='binary' requires exactly 2 choices "
                f"when choices are provided, got {len(fs.choices)}."
            )

        if fs.choice_order is not None and fs.analysis_type != "ordinal":
            raise ValueError(
                f"{section}.fields['{name}'].choice_order is only valid when "
                "analysis_type='ordinal'."
            )

        if fs.analysis_type == "ordinal":
            if fs.choice_order is None and fs.choices is None:
                raise ValueError(
                    f"{section}.fields['{name}'] analysis_type='ordinal' requires "
                    "either choice_order or choices."
                )
            if fs.choice_order is not None and fs.choices is not None:
                if set(fs.choice_order) != set(fs.choices):
                    raise ValueError(
                        f"{section}.fields['{name}'].choice_order must contain the same "
                        "elements as choices."
                    )


        out.append(fs)

    return out