Spaces:
Sleeping
Sleeping
Parse judgments with structured output prompting, one response model, one judge model at a time.
eb4ec23
from pydantic import BaseModel, Field, conint | |
from typing import List, Optional, Literal, Union | |
class Criteria(BaseModel): | |
name: str | |
description: str | |
min_score: conint(ge=0) | |
max_score: conint(ge=0) | |
class DirectAssessment(BaseModel): | |
type: Literal["direct_assessment"] | |
criteria: List[Criteria] | |
prompt: str | |
class PairwiseComparison(BaseModel): | |
type: Literal["pairwise_comparison"] | |
granularity: Literal["coarse", "fine", "super fine"] | |
ties_allowed: bool | |
position_swapping: bool | |
reference_model: str | |
prompt: str | |
class JudgingConfig(BaseModel): | |
assessment: Union[DirectAssessment, PairwiseComparison] | |
class DirectAssessmentCriterionScore(BaseModel): | |
criterion: str | |
score: int | |
explanation: str | |
class DirectAssessmentCriteriaScores(BaseModel): | |
# model: str | |
criteria_scores: List[DirectAssessmentCriterionScore] | |
# class DirectAssessmentJudgingResponse(BaseModel): | |
# judging_models: List[DirectAssessmentCriteriaScores] | |