Spaces:
No application file
No application file
File size: 3,036 Bytes
477fa2f |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 |
{
"cells": [
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"## Building an eval for LAMBADA\n",
"\n",
"We show how to build an eval for the LAMBADA dataset"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Download LAMBADA from https://zenodo.org/record/2630551 and place in examples/lambada-dataset\n",
"!ls lambada-dataset\n",
"import os\n",
"import pandas as pd\n",
"\n",
"registry_pth = os.path.join(\"..\", \"evals\", \"registry\")\n",
"os.makedirs(os.path.join(registry_pth, \"data\", \"lambada\"), exist_ok=True)\n",
"\n",
"def create_chat_prompt(text):\n",
" return [\n",
" {\"role\": \"system\", \"content\": \"Please complete the passages with the correct next word.\"}, \n",
" {\"role\": \"user\", \"content\": text}\n",
" ]\n",
"\n",
"df = pd.read_csv('lambada-dataset/lambada_test_plain_text.txt', sep=\"\\t\", names=[\"text\"])\n",
"df[\"text\"] = df[\"text\"].str.split(\" \")\n",
"df[\"input\"], df[\"ideal\"] = df[\"text\"].str[:-1].str.join(\" \").apply(create_chat_prompt), df[\"text\"].str[-1]\n",
"df = df[[\"input\", \"ideal\"]]\n",
"df.to_json(os.path.join(registry_pth, \"data/lambada/samples.jsonl\"), orient=\"records\", lines=True)\n",
"display(df.head())\n",
"\n",
"eval_yaml = \"\"\"\n",
"lambada:\n",
" id: lambada.test.v1\n",
" metrics: [accuracy]\n",
"lambada.test.v1:\n",
" class: evals.elsuite.basic.match:Match\n",
" args:\n",
" samples_jsonl: lambada/samples.jsonl\n",
"\"\"\".strip()\n",
"with open(os.path.join(registry_pth, \"evals\", \"lambada.yaml\"), \"w\") as f:\n",
" f.write(eval_yaml)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"!oaieval gpt-3.5-turbo lambada --max_samples 20"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Inspect samples\n",
"log_path = None # Set to jsonl path to logs from oaieval\n",
"events = f\"/tmp/evallogs/{log_path}\"\n",
"with open(events, \"r\") as f:\n",
" events_df = pd.read_json(f, lines=True)\n",
"for i, r in pd.json_normalize(events_df[events_df.type == \"sampling\"].data).iterrows():\n",
" print(r)\n",
" print(f\"Prompt: {r.prompt}\")\n",
" print(f\"Sampled: {r.sampled}\")\n",
" print(\"-\" * 25)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "base",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.9"
},
"orig_nbformat": 4
},
"nbformat": 4,
"nbformat_minor": 2
}
|