Source code for sim_panel.analysis.compare.types

from __future__ import annotations

from dataclasses import dataclass, field
from typing import Any, Dict, List, Optional, Literal


[docs] @dataclass(frozen=True) class ConditionSpec: label: str model: str strategy: str run_dir: str condition_type: str = "synthetic" events_filename: str = "events.jsonl" @property def is_real(self) -> bool: return self.condition_type == "real"
[docs] @dataclass(frozen=True) class CompareConfig: output_dir: str outcome_field: str # e.g. "rating" conditions: List[ConditionSpec] rating_scale: Optional[List[int]] = None # e.g. [1..10]; inferred if None benchmark_top_k_products: int = 20
[docs] @dataclass class ConditionMetrics: label: str model: str strategy: str n_evaluations: int = 0 n_with_outcome: int = 0 rating_mean: Optional[float] = None rating_std: Optional[float] = None rating_median: Optional[float] = None # Persona consistency: do different personas give different ratings? panelist_mean_variance: Optional[float] = None mean_pairwise_panelist_distance: Optional[float] = None # Product differentiation: do different products get different ratings? product_mean_variance: Optional[float] = None # Distribution shape rating_entropy: Optional[float] = None rating_normalized_entropy: Optional[float] = None # Raw distribution for cross-condition comparisons rating_distribution: Dict[Any, int] = field(default_factory=dict) # All numeric values for pairwise computations _values: List[float] = field(default_factory=list, repr=False)
[docs] @dataclass(frozen=True) class CompareMode: kind: Literal["cross", "benchmark"] reference_label: Optional[str] = None