{ "cells": [ { "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ "## Building an eval for LAMBADA\n", "\n", "We show how to build an eval for the LAMBADA dataset" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# Download LAMBADA from https://zenodo.org/record/2630551 and place in examples/lambada-dataset\n", "!ls lambada-dataset\n", "import os\n", "import pandas as pd\n", "\n", "registry_pth = os.path.join(\"..\", \"evals\", \"registry\")\n", "os.makedirs(os.path.join(registry_pth, \"data\", \"lambada\"), exist_ok=True)\n", "\n", "def create_chat_prompt(text):\n", " return [\n", " {\"role\": \"system\", \"content\": \"Please complete the passages with the correct next word.\"}, \n", " {\"role\": \"user\", \"content\": text}\n", " ]\n", "\n", "df = pd.read_csv('lambada-dataset/lambada_test_plain_text.txt', sep=\"\\t\", names=[\"text\"])\n", "df[\"text\"] = df[\"text\"].str.split(\" \")\n", "df[\"input\"], df[\"ideal\"] = df[\"text\"].str[:-1].str.join(\" \").apply(create_chat_prompt), df[\"text\"].str[-1]\n", "df = df[[\"input\", \"ideal\"]]\n", "df.to_json(os.path.join(registry_pth, \"data/lambada/samples.jsonl\"), orient=\"records\", lines=True)\n", "display(df.head())\n", "\n", "eval_yaml = \"\"\"\n", "lambada:\n", " id: lambada.test.v1\n", " metrics: [accuracy]\n", "lambada.test.v1:\n", " class: evals.elsuite.basic.match:Match\n", " args:\n", " samples_jsonl: lambada/samples.jsonl\n", "\"\"\".strip()\n", "with open(os.path.join(registry_pth, \"evals\", \"lambada.yaml\"), \"w\") as f:\n", " f.write(eval_yaml)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "!oaieval gpt-3.5-turbo lambada --max_samples 20" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# Inspect samples\n", "log_path = None # Set to jsonl path to logs from oaieval\n", "events = f\"/tmp/evallogs/{log_path}\"\n", "with open(events, \"r\") as f:\n", " events_df = pd.read_json(f, lines=True)\n", "for i, r in pd.json_normalize(events_df[events_df.type == \"sampling\"].data).iterrows():\n", " print(r)\n", " print(f\"Prompt: {r.prompt}\")\n", " print(f\"Sampled: {r.sampled}\")\n", " print(\"-\" * 25)" ] } ], "metadata": { "kernelspec": { "display_name": "base", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.10.9" }, "orig_nbformat": 4 }, "nbformat": 4, "nbformat_minor": 2 }