|
|
|
|
|
from utils.logger import log_output |
|
from evaluation.metrics import extract_final_answer |
|
|
|
def apply_self_consistency_prompt(question: str) -> str: |
|
return ( |
|
f"Solve the following problem step by step with the fromat of LaTex:\n" |
|
f"{question}\n\n" |
|
f"Clearly show your reasoning and your answer.\n" |
|
) |
|
|
|
def apply_self_consistency_answer(question: str, model, dataset: str, model_name: str, qid: int, n_iter=5, prompt_method="self_consistency") -> str: |
|
prompt = apply_self_consistency_prompt(question) |
|
answers = [model.generate(prompt) for _ in range(n_iter)] |
|
|
|
extracted = [extract_final_answer("", ans) for ans in answers] |
|
most_common = max(set(extracted), key=extracted.count) |
|
|
|
log_output(dataset, model_name, prompt_method, qid, { |
|
"question": question, |
|
"all_answers": answers, |
|
"final_answer": most_common |
|
}) |
|
|
|
return most_common |
|
|
|
|
|
|