sandbox / judging_dataclasses.py
justinxzhao's picture
Parse judgments with structured output prompting, one response model, one judge model at a time.
eb4ec23
raw
history blame
1.01 kB
from pydantic import BaseModel, Field, conint
from typing import List, Optional, Literal, Union
class Criteria(BaseModel):
name: str
description: str
min_score: conint(ge=0)
max_score: conint(ge=0)
class DirectAssessment(BaseModel):
type: Literal["direct_assessment"]
criteria: List[Criteria]
prompt: str
class PairwiseComparison(BaseModel):
type: Literal["pairwise_comparison"]
granularity: Literal["coarse", "fine", "super fine"]
ties_allowed: bool
position_swapping: bool
reference_model: str
prompt: str
class JudgingConfig(BaseModel):
assessment: Union[DirectAssessment, PairwiseComparison]
class DirectAssessmentCriterionScore(BaseModel):
criterion: str
score: int
explanation: str
class DirectAssessmentCriteriaScores(BaseModel):
# model: str
criteria_scores: List[DirectAssessmentCriterionScore]
# class DirectAssessmentJudgingResponse(BaseModel):
# judging_models: List[DirectAssessmentCriteriaScores]