{"cells":[{"cell_type":"code","execution_count":1,"metadata":{"application/vnd.databricks.v1+cell":{"cellMetadata":{},"inputWidgets":{},"nuid":"0ea8b46b-839b-445b-8043-ccdf4e920ace","showTitle":false,"title":""},"id":"YLH80COBzi_F"},"outputs":[],"source":["%load_ext autoreload\n","%autoreload 2"]},{"cell_type":"code","execution_count":2,"metadata":{"id":"63B5exAuzq4M"},"outputs":[],"source":["from pathlib import Path\n","\n","try:\n"," from google.colab import drive\n"," drive.mount('/content/drive')\n"," workding_dir = \"/content/drive/MyDrive/logical-reasoning/\"\n","except ModuleNotFoundError:\n"," workding_dir = str(Path.cwd().parent)"]},{"cell_type":"code","execution_count":3,"metadata":{"executionInfo":{"elapsed":368,"status":"ok","timestamp":1719461634865,"user":{"displayName":"Donghao Huang","userId":"00463591218503521679"},"user_tz":-480},"id":"zFulf0bg0H-9","outputId":"debdd535-c828-40b9-efc0-8a180e5830dd"},"outputs":[{"name":"stdout","output_type":"stream","text":["workding dir: /Users/inflaton/code/engd/projects/logical-reasoning\n"]}],"source":["import os\n","import sys\n","\n","os.chdir(workding_dir)\n","sys.path.append(workding_dir)\n","print(\"workding dir:\", workding_dir)"]},{"cell_type":"code","execution_count":4,"metadata":{"application/vnd.databricks.v1+cell":{"cellMetadata":{},"inputWidgets":{},"nuid":"9f67ec60-2f24-411c-84eb-0dd664b44775","showTitle":false,"title":""},"colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"elapsed":589,"status":"ok","timestamp":1719462011879,"user":{"displayName":"Donghao Huang","userId":"00463591218503521679"},"user_tz":-480},"id":"DIUiweYYzi_I","outputId":"e16e9247-9077-4b0c-f8ea-17059f05a1c4"},"outputs":[{"name":"stdout","output_type":"stream","text":["loading env vars from: /Users/inflaton/code/engd/projects/logical-reasoning/.env\n"]},{"data":{"text/plain":["True"]},"execution_count":4,"metadata":{},"output_type":"execute_result"}],"source":["from dotenv import find_dotenv, load_dotenv\n","\n","found_dotenv = find_dotenv(\".env\")\n","\n","if len(found_dotenv) == 0:\n"," found_dotenv = find_dotenv(\".env.example\")\n","print(f\"loading env vars from: {found_dotenv}\")\n","load_dotenv(found_dotenv, override=True)"]},{"cell_type":"code","execution_count":5,"metadata":{"id":"W2QyVreqhOGM","outputId":"68b9590e-1ac6-4c6f-e0c4-e273ec816419"},"outputs":[{"data":{"text/html":["
\n"," | text | \n","label | \n","title | \n","puzzle | \n","truth | \n","meta-llama/Meta-Llama-3-8B-Instruct/checkpoint-117_torch.bfloat16_lf | \n","meta-llama/Meta-Llama-3-8B-Instruct/checkpoint-234_torch.bfloat16_lf | \n","meta-llama/Meta-Llama-3-8B-Instruct/checkpoint-351_torch.bfloat16_lf | \n","meta-llama/Meta-Llama-3-8B-Instruct_torch.bfloat16_lf | \n","
0 | \n","Was Zhen Zhesuo suicide? | \n","No | \n","The Mystery of the Coast | \n","In the quiet seaside cottage of a neighbor, a ... | \n","Zhen Zhesao was a nature-loving painter who ca... | \n","No | \n","No | \n","No | \n","I'll answer your question according to the rul... | \n","
1 | \n","Was Zhen Zhesuo sickly? | \n","Yes | \n","The Mystery of the Coast | \n","In the quiet seaside cottage of a neighbor, a ... | \n","Zhen Zhesao was a nature-loving painter who ca... | \n","Yes | \n","Yes | \n","Yes | \n","Answer: Yes | \n","
2 | \n","The painting was by Zhen. | \n","Yes | \n","The Mystery of the Coast | \n","In the quiet seaside cottage of a neighbor, a ... | \n","Zhen Zhesao was a nature-loving painter who ca... | \n","No | \n","Yes | \n","Yes | \n","Answer: Correct answer | \n","
3 | \n","Was Zhen with a heart condition? | \n","Yes | \n","The Mystery of the Coast | \n","In the quiet seaside cottage of a neighbor, a ... | \n","Zhen Zhesao was a nature-loving painter who ca... | \n","Yes | \n","Yes | \n","Yes | \n","I'll answer with one of the five options:\\n\\nC... | \n","
4 | \n","The wheel was the murderer's weapon. | \n","No | \n","The Mystery of the Coast | \n","In the quiet seaside cottage of a neighbor, a ... | \n","Zhen Zhesao was a nature-loving painter who ca... | \n","No | \n","No | \n","No | \n","I cannot answer that question. | \n","
... | \n","... | \n","... | \n","... | \n","... | \n","... | \n","... | \n","... | \n","... | \n","... | \n","
2995 | \n","Did the weeping person have to make a sacrific... | \n","Yes | \n","Zhen Zhuo's wails | \n","One night, in a quiet village, a weeping sound... | \n","It turned out that the old hat belonged to a l... | \n","No | \n","No | \n","Unimportant | \n","I'll answer your question according to the rul... | \n","
2996 | \n","Was the body in the lake? | \n","No | \n","Zhen Zhuo's wails | \n","One night, in a quiet village, a weeping sound... | \n","It turned out that the old hat belonged to a l... | \n","No | \n","No | \n","No | \n","Unimportant. | \n","
2997 | \n","Do mourners have a special relationship with t... | \n","Yes | \n","Zhen Zhuo's wails | \n","One night, in a quiet village, a weeping sound... | \n","It turned out that the old hat belonged to a l... | \n","Yes | \n","Yes | \n","Yes | \n","Answer: Yes | \n","
2998 | \n","Was the owner of the hat dead? | \n","No | \n","Zhen Zhuo's wails | \n","One night, in a quiet village, a weeping sound... | \n","It turned out that the old hat belonged to a l... | \n","No | \n","No | \n","No | \n","I can answer this question with one of the fiv... | \n","
2999 | \n","Was the dead person wounded? | \n","No | \n","Zhen Zhuo's wails | \n","One night, in a quiet village, a weeping sound... | \n","It turned out that the old hat belonged to a l... | \n","No | \n","No | \n","Unimportant | \n","I can answer your question according to the ru... | \n","
3000 rows × 9 columns
\n","\n"," | epoch | \n","model | \n","accuracy | \n","precision | \n","recall | \n","f1 | \n","
0 | \n","0.000000 | \n","meta-llama/Meta-Llama-3-8B-Instruct_torch.bflo... | \n","0.133333 | \n","0.543049 | \n","0.133333 | \n","0.178079 | \n","
1 | \n","0.333333 | \n","meta-llama/Meta-Llama-3-8B-Instruct/checkpoint... | \n","0.648667 | \n","0.652593 | \n","0.648667 | \n","0.631272 | \n","
2 | \n","0.666667 | \n","meta-llama/Meta-Llama-3-8B-Instruct/checkpoint... | \n","0.561000 | \n","0.689710 | \n","0.561000 | \n","0.608339 | \n","
3 | \n","1.000000 | \n","meta-llama/Meta-Llama-3-8B-Instruct/checkpoint... | \n","0.621000 | \n","0.686843 | \n","0.621000 | \n","0.641744 | \n","