Source code for sim_panel.sources.types
from __future__ import annotations
from dataclasses import dataclass, field
from pathlib import Path
from typing import Any, Dict, List, Mapping, Optional, Sequence
from sim_panel.panelists.records import PersonaRecord
from sim_panel.products.records import ProductRecord
JsonDict = Dict[str, Any]
[docs]
@dataclass(slots=True)
class SourceConfig:
"""
Generic source-layer configuration.
Source-specific config classes may subclass this and add extra fields.
"""
name: str
output_dir: Optional[Path] = None
seed: int = 0
params: Dict[str, Any] = field(default_factory=dict)
[docs]
@dataclass(slots=True)
class SourceStats:
"""
Lightweight summary statistics for a source import run.
"""
n_raw_reviews: int = 0
n_raw_products: int = 0
n_events: int = 0
n_products: int = 0
n_personas: int = 0
n_reviews_missing_product_metadata: int = 0
extra: Dict[str, Any] = field(default_factory=dict)
[docs]
def as_dict(self) -> JsonDict:
return {
"n_raw_reviews": self.n_raw_reviews,
"n_raw_products": self.n_raw_products,
"n_events": self.n_events,
"n_products": self.n_products,
"n_personas": self.n_personas,
"n_reviews_missing_product_metadata": self.n_reviews_missing_product_metadata,
"extra": dict(self.extra),
}
[docs]
@dataclass(slots=True)
class SourceRawBundle:
"""
Raw source artifacts loaded from external files, before canonical projection.
"""
reviews: Sequence[Mapping[str, Any]] = field(default_factory=list)
products: Sequence[Mapping[str, Any]] = field(default_factory=list)
aux: Dict[str, Any] = field(default_factory=dict)
[docs]
@dataclass(slots=True)
class SourceExportBundle:
"""
Canonical export payload produced by a source importer.
Events remain schema-valid row dicts.
Products and personas are typed on-disk records.
"""
events: List[JsonDict] = field(default_factory=list)
products: List[ProductRecord] = field(default_factory=list)
personas: List[PersonaRecord] = field(default_factory=list)
metadata: JsonDict = field(default_factory=dict)
data_dictionary: JsonDict = field(default_factory=dict)
stats: SourceStats = field(default_factory=SourceStats)
[docs]
def is_empty(self) -> bool:
return not (self.events or self.products or self.personas)
[docs]
def as_dict(self) -> JsonDict:
return {
"events": self.events,
"products": [p.to_dict() for p in self.products],
"personas": [p.to_dict() for p in self.personas],
"metadata": self.metadata,
"data_dictionary": self.data_dictionary,
"stats": self.stats.as_dict(),
}