# prompts/self_refine.py from utils.logger import log_output from evaluation.metrics import extract_final_answer def apply_initial_prompt(question: str) -> str: return ( f"Solve the following problem step by step with the fromat of LaTex:\n" f"{question}\n\n" f"Clearly show your reasoning and your answer.\n" ) def apply_feedback_prompt(question: str, initial_answer: str) -> str: return ( f"Question: {question}\n" f"Initial Answer:\n{initial_answer}\n\n" f"There might be a mistake or a better way to solve this.\n" f"Clearly show your reasoning and your answer.\n" ) def apply_self_refine_answer(question: str, model, dataset: str, model_name: str, qid: int, prompt_method="self_refine") -> str: first_prompt = apply_initial_prompt(question) first_answer = model.generate(first_prompt) initial_answer = extract_final_answer("", first_answer) second_prompt = apply_feedback_prompt(question, initial_answer) refined_answer = model.generate(second_prompt) log_output(dataset, model_name, prompt_method, qid, { "question": question, "initial_answer": initial_answer, "refined_answer": refined_answer }) return refined_answer