Source code for sim_panel.outcomes.llm
from __future__ import annotations
from typing import Any, Dict, Optional
from sim_panel.outcomes.base import EvaluationContext, OutcomeConfig, OutcomeResult
from sim_panel.outcomes.parsing import extract_json_object, safe_excerpt
from sim_panel.outcomes.render import render_evaluation_prompt
[docs]
class LLMOutcomeModel:
"""
LLM-backed outcome model.
- Renders a questionnaire prompt from YAML spec
- Calls panelist.evaluate(...) (persona-endowed)
- Extracts JSON and validates against the QuestionnaireSpec
"""
def __init__(self, cfg: OutcomeConfig) -> None:
self.cfg = cfg
[docs]
def evaluate(self, *, panelist, ctx: EvaluationContext, prompting_strategy: str = "persona") -> OutcomeResult:
prompt = render_evaluation_prompt(
ctx=ctx,
questionnaire=self.cfg.questionnaire,
include_features=True,
outcome_cfg=self.cfg,
prompting_strategy=prompting_strategy,
)
# Determine system prompt based on strategy
system_prompt = None # default: use panelist.persona_text
if prompting_strategy in ("zero_shot", "few_shot"):
system_prompt = "You are evaluating consumer products. Provide honest, thoughtful responses."
# Panelist is responsible for backend calls (separate from policies/outcomes).
raw = panelist.evaluate(
task_prompt=prompt,
temperature=self.cfg.temperature,
max_tokens=self.cfg.max_tokens,
metadata={"module": "outcomes.llm", "panelist_id": ctx.panelist_id, "product_id": ctx.product_id, "t": ctx.t},
system_prompt=system_prompt,
)
obj, err = extract_json_object(raw)
if err is not None or obj is None:
traces = {
"parse_error": err,
"raw_excerpt": safe_excerpt(raw),
}
return OutcomeResult(
outcomes=None,
traces=traces,
raw_text=raw if self.cfg.include_raw_text else None,
errors=[err] if err else ["unknown parse error"],
)
outcomes, traces, errors = self.cfg.questionnaire.validate_payload(obj)
# If validation fails, preserve debug info in traces
if outcomes is None:
debug_traces = dict(traces or {})
debug_traces.setdefault("validation_errors", errors)
debug_traces.setdefault("raw_excerpt", safe_excerpt(raw))
return OutcomeResult(
outcomes=None,
traces=debug_traces,
raw_text=raw if self.cfg.include_raw_text else None,
errors=errors,
)
# Merge any warnings/errors into traces (non-fatal warnings start with [warn])
final_traces = traces if traces is not None else {}
warn = [e for e in (errors or []) if e.startswith("[warn]")]
if warn:
final_traces = dict(final_traces or {})
final_traces["warnings"] = warn
return OutcomeResult(
outcomes=outcomes,
traces=final_traces,
raw_text=raw if self.cfg.include_raw_text else None,
errors=errors,
)