Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -523,7 +523,7 @@ The computation question MAY NOT:
|
|
523 |
}
|
524 |
return problem_type_additions.get(question_type, "")
|
525 |
|
526 |
-
def get_solution_for_verification(response_text,
|
527 |
"""
|
528 |
Extract the relevant parts of the solution for verification based on whether
|
529 |
the original solution was correct or not. Always preserves the original question.
|
@@ -673,14 +673,14 @@ Format your response with clear headers and bullet points."""
|
|
673 |
logger.error(f"Error in ChatGPT verification: {str(e)}")
|
674 |
return f"Error in ChatGPT verification: {str(e)}"
|
675 |
|
676 |
-
def append_chatgpt_verification(initial_response,
|
677 |
"""
|
678 |
Main function to handle the ChatGPT verification process.
|
679 |
Returns the original response with the ChatGPT verification appended.
|
680 |
"""
|
681 |
try:
|
682 |
# Get the appropriate solution text for verification
|
683 |
-
question, solution_text = get_solution_for_verification(initial_response,
|
684 |
|
685 |
# Get ChatGPT's verification
|
686 |
chatgpt_verification = verify_with_chatgpt(question, solution_text)
|
@@ -894,25 +894,32 @@ make sure the question is CLEAR about what regions you intend to be included in
|
|
894 |
# Execute SymPy code and append results
|
895 |
sympy_output = extract_and_run_sympy_code_simple(response_text)
|
896 |
has_discrepancy = False # Initialize outside the if block
|
897 |
-
|
898 |
revised_solution = None
|
899 |
final_verification = None
|
900 |
|
901 |
-
|
902 |
-
|
903 |
-
|
904 |
-
|
905 |
-
|
906 |
-
|
907 |
-
|
908 |
-
|
909 |
-
|
910 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
911 |
# add the ChatGPT verification
|
912 |
if include_chatgpt == "yes":
|
913 |
response_text = append_chatgpt_verification(
|
914 |
response_text,
|
915 |
-
|
916 |
final_verification if has_discrepancy else None
|
917 |
)
|
918 |
|
@@ -994,7 +1001,7 @@ def check_and_resolve_discrepancy(initial_response, sympy_output):
|
|
994 |
has_discrepancy = False #Initialize
|
995 |
resolution_text = ""
|
996 |
revised_solution = None
|
997 |
-
|
998 |
|
999 |
try:
|
1000 |
resolution_prompt = f"""Here is a mathematics question with two answers.
|
@@ -1015,12 +1022,12 @@ def check_and_resolve_discrepancy(initial_response, sympy_output):
|
|
1015 |
4. After your analysis, conclude ONE of the following:
|
1016 |
|
1017 |
If equivalence is PROVEN:
|
1018 |
-
- Write "
|
1019 |
- Explain exactly how you proved equivalence
|
1020 |
- Show all steps of the verification
|
1021 |
|
1022 |
If equivalence CANNOT be proven:
|
1023 |
-
- Write "
|
1024 |
- Explain why equivalence cannot be established
|
1025 |
- Write "Here is the revised complete solution:" and then write out an ENTIRE corrected solution from beginning
|
1026 |
to end, including all parts that were correct and the corrections for any incorrect parts.
|
@@ -1031,10 +1038,10 @@ If the two answers are inconsistent with each other then please:
|
|
1031 |
1. Identify which solution is correct
|
1032 |
2. Explain the error in the incorrect solution
|
1033 |
3. Write "Here is the revised complete solution:"
|
1034 |
-
4. Start with "
|
1035 |
|
1036 |
If verification is INCONCLUSIVE:
|
1037 |
-
- Write "
|
1038 |
- Explain why equivalence cannot be determined
|
1039 |
- Request a new SymPy verification with additional checks
|
1040 |
|
@@ -1122,22 +1129,22 @@ Please maintain the same LaTeX formatting as the original solution."""
|
|
1122 |
logger.error(f"Error in solution recheck: {str(e)}")
|
1123 |
|
1124 |
# Parse whether SymPy was correct
|
1125 |
-
|
1126 |
-
if "
|
1127 |
-
|
1128 |
-
elif "
|
1129 |
-
|
1130 |
|
1131 |
-
return resolution_text, has_discrepancy, revised_solution,
|
1132 |
|
1133 |
except Exception as e:
|
1134 |
logger.error(f"Error in discrepancy resolution: {str(e)}")
|
1135 |
resolution_text = f"Error in resolution: {str(e)}"
|
1136 |
has_discrepancy = False # Explicitly set in error case
|
1137 |
revised_solution = None
|
1138 |
-
return resolution_text, has_discrepancy, revised_solution,
|
1139 |
|
1140 |
-
def perform_final_verification(revised_solution,
|
1141 |
"""
|
1142 |
Perform a final verification of the revised solution.
|
1143 |
"""
|
@@ -1165,7 +1172,7 @@ Please follow these steps exactly:
|
|
1165 |
- Any missing steps or assumptions
|
1166 |
- Any necessary additional proofs or derivations
|
1167 |
|
1168 |
-
4. The answer aligns with the {'SymPy' if
|
1169 |
|
1170 |
Your complete solution must:
|
1171 |
- Be completely self-contained
|
|
|
523 |
}
|
524 |
return problem_type_additions.get(question_type, "")
|
525 |
|
526 |
+
def get_solution_for_verification(response_text, SYMPY_CONFIRMED, final_verification=None):
|
527 |
"""
|
528 |
Extract the relevant parts of the solution for verification based on whether
|
529 |
the original solution was correct or not. Always preserves the original question.
|
|
|
673 |
logger.error(f"Error in ChatGPT verification: {str(e)}")
|
674 |
return f"Error in ChatGPT verification: {str(e)}"
|
675 |
|
676 |
+
def append_chatgpt_verification(initial_response, SYMPY_CONFIRMED, final_verification=None):
|
677 |
"""
|
678 |
Main function to handle the ChatGPT verification process.
|
679 |
Returns the original response with the ChatGPT verification appended.
|
680 |
"""
|
681 |
try:
|
682 |
# Get the appropriate solution text for verification
|
683 |
+
question, solution_text = get_solution_for_verification(initial_response, SYMPY_CONFIRMED, final_verification)
|
684 |
|
685 |
# Get ChatGPT's verification
|
686 |
chatgpt_verification = verify_with_chatgpt(question, solution_text)
|
|
|
894 |
# Execute SymPy code and append results
|
895 |
sympy_output = extract_and_run_sympy_code_simple(response_text)
|
896 |
has_discrepancy = False # Initialize outside the if block
|
897 |
+
SYMPY_CONFIRMED = None
|
898 |
revised_solution = None
|
899 |
final_verification = None
|
900 |
|
901 |
+
# Store original response text before modifications
|
902 |
+
original_response = response_text
|
903 |
+
|
904 |
+
# Always do verification analysis
|
905 |
+
if "Error" in sympy_output:
|
906 |
+
verification_text = "SymPy verification failed with error. Manual solution must be verified independently."
|
907 |
+
SYMPY_CONFIRMED = "Inconclusive"
|
908 |
+
response_text = f"{response_text}\n\nSymPy Verification Results:\n```\n{sympy_output}\n```\n\nVerification Analysis:\n{verification_text}"
|
909 |
+
else:
|
910 |
+
resolution_text, has_discrepancy, revised_solution, SYMPY_CONFIRMED = check_and_resolve_discrepancy(original_response, sympy_output)
|
911 |
+
response_text = f"{response_text}\n\nSymPy Verification Results:\n```\n{sympy_output}\n```\n\nVerification Analysis:\n{resolution_text}"
|
912 |
+
|
913 |
+
if has_discrepancy and revised_solution:
|
914 |
+
logger.debug("Performing final verification for problem with discrepancy")
|
915 |
+
final_verification = perform_final_verification(revised_solution, SYMPY_CONFIRMED)
|
916 |
+
response_text += "\n\nFinal Expert Verification:\n" + final_verification
|
917 |
+
|
918 |
# add the ChatGPT verification
|
919 |
if include_chatgpt == "yes":
|
920 |
response_text = append_chatgpt_verification(
|
921 |
response_text,
|
922 |
+
SYMPY_CONFIRMED,
|
923 |
final_verification if has_discrepancy else None
|
924 |
)
|
925 |
|
|
|
1001 |
has_discrepancy = False #Initialize
|
1002 |
resolution_text = ""
|
1003 |
revised_solution = None
|
1004 |
+
SYMPY_CONFIRMED = None # Initialize at the start
|
1005 |
|
1006 |
try:
|
1007 |
resolution_prompt = f"""Here is a mathematics question with two answers.
|
|
|
1022 |
4. After your analysis, conclude ONE of the following:
|
1023 |
|
1024 |
If equivalence is PROVEN:
|
1025 |
+
- Write "SYMPY_CONFIRMED: True" on its own line (this means SymPy's output CONFIRMS the original solution)
|
1026 |
- Explain exactly how you proved equivalence
|
1027 |
- Show all steps of the verification
|
1028 |
|
1029 |
If equivalence CANNOT be proven:
|
1030 |
+
- Write "SYMPY_CONFIRMED: False" on its own line
|
1031 |
- Explain why equivalence cannot be established
|
1032 |
- Write "Here is the revised complete solution:" and then write out an ENTIRE corrected solution from beginning
|
1033 |
to end, including all parts that were correct and the corrections for any incorrect parts.
|
|
|
1038 |
1. Identify which solution is correct
|
1039 |
2. Explain the error in the incorrect solution
|
1040 |
3. Write "Here is the revised complete solution:"
|
1041 |
+
4. Start with "SYMPY_CONFIRMED: False" on its own line.
|
1042 |
|
1043 |
If verification is INCONCLUSIVE:
|
1044 |
+
- Write "SYMPY_CONFIRMED: Inconclusive" on its own line
|
1045 |
- Explain why equivalence cannot be determined
|
1046 |
- Request a new SymPy verification with additional checks
|
1047 |
|
|
|
1129 |
logger.error(f"Error in solution recheck: {str(e)}")
|
1130 |
|
1131 |
# Parse whether SymPy was correct
|
1132 |
+
SYMPY_CONFIRMED = None
|
1133 |
+
if "SYMPY_CONFIRMED: True" in resolution_text:
|
1134 |
+
SYMPY_CONFIRMED = True
|
1135 |
+
elif "SYMPY_CONFIRMED: False" in resolution_text:
|
1136 |
+
SYMPY_CONFIRMED = False
|
1137 |
|
1138 |
+
return resolution_text, has_discrepancy, revised_solution, SYMPY_CONFIRMED
|
1139 |
|
1140 |
except Exception as e:
|
1141 |
logger.error(f"Error in discrepancy resolution: {str(e)}")
|
1142 |
resolution_text = f"Error in resolution: {str(e)}"
|
1143 |
has_discrepancy = False # Explicitly set in error case
|
1144 |
revised_solution = None
|
1145 |
+
return resolution_text, has_discrepancy, revised_solution, SYMPY_CONFIRMED
|
1146 |
|
1147 |
+
def perform_final_verification(revised_solution, SYMPY_CONFIRMED):
|
1148 |
"""
|
1149 |
Perform a final verification of the revised solution.
|
1150 |
"""
|
|
|
1172 |
- Any missing steps or assumptions
|
1173 |
- Any necessary additional proofs or derivations
|
1174 |
|
1175 |
+
4. The answer aligns with the {'SymPy' if SYMPY_CONFIRMED else 'original'} answer proven correct
|
1176 |
|
1177 |
Your complete solution must:
|
1178 |
- Be completely self-contained
|