Spaces:

sukiboo
/

invisible-rag-demo

Sleeping

App Files Files Community

sukiboo commited on Mar 10, 2024

Commit

570952a

1 Parent(s): 349f8b3

initial app interface

Browse files

Files changed (1) hide show

app.ipynb +260 -0

app.ipynb ADDED Viewed

	@@ -0,0 +1,260 @@

+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "3bae1d7d-a2be-444d-97cc-d1cbf8843bf1",
+   "metadata": {},
+   "source": [
+    "# Invisible RAG Pilot Demo App"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "id": "2a8e18f7-cc88-4bbf-a6e1-095237ed7714",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Running on local URL:  http://127.0.0.1:7861\n",
+      "\n",
+      "To create a public link, set `share=True` in `launch()`.\n"
+     ]
+    },
+    {
+     "data": {
+      "text/html": [
+       "<div><iframe src=\"http://127.0.0.1:7861/\" width=\"100%\" height=\"500\" allow=\"autoplay; camera; microphone; clipboard-read; clipboard-write;\" frameborder=\"0\" allowfullscreen></iframe></div>"
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "import json\n",
+    "import gradio as gr\n",
+    "\n",
+    "\n",
+    "class RAGInterface:\n",
+    "    \"\"\"\n",
+    "    Setup the gradio app for loading/saving/syncronizing the mockup A/B evaluation RAG tasks.\n",
+    "    The app is deployed on Hugging Face spaces at https://huggingface.co/spaces/sukiboo/invisible-rag-demo\n",
+    "    \"\"\"\n",
+    "\n",
+    "    def __init__(self):\n",
+    "        self.setup_interface()\n",
+    "        self.launch_interface()\n",
+    "\n",
+    "    def setup_interface(self):\n",
+    "        \"\"\"Configure the A/B Evaluation RAG task interface.\"\"\"\n",
+    "        with gr.Blocks(title='AB Evaluate RAG') as self.interface:\n",
+    "\n",
+    "            # protected fields\n",
+    "            _task_id = gr.Textbox(label='Task ID', interactive=False, visible=False)\n",
+    "\n",
+    "            # task id, load button, chat history, search results\n",
+    "            with gr.Row():\n",
+    "                task_id = gr.Textbox(container=False, placeholder='Task ID', scale=9)\n",
+    "                load_button = gr.Button('Load Task', scale=1)\n",
+    "            chat = gr.Chatbot(height=700, layout='bubble', label='Chat History')\n",
+    "            sources = gr.Markdown()\n",
+    "\n",
+    "            # model completions for answers 1 and 2\n",
+    "            with gr.Row():\n",
+    "                with gr.Column():\n",
+    "                    answer1 = gr.Textbox(label='Answer 1', max_lines=50)\n",
+    "                with gr.Column():\n",
+    "                    answer2 = gr.Textbox(label='Answer 2', max_lines=50)\n",
+    "\n",
+    "            # individual ratings for answers 1 and 2\n",
+    "            with gr.Row():\n",
+    "                with gr.Column():\n",
+    "                    groundedness1 = gr.Radio(label='Groundedness', choices=['Bad', 'Good', 'Perfect'])\n",
+    "                    fluency1 = gr.Radio(label='Fluency', choices=['Bad', 'Good', 'Perfect'])\n",
+    "                    utility1 = gr.Radio(label='Utility', choices=['Catastrophic', 'Bad', 'Good', 'Perfect'])\n",
+    "                    notes1 = gr.Textbox(label='Notes', placeholder='N/A')\n",
+    "                with gr.Column():\n",
+    "                    groundedness2 = gr.Radio(label='Groundedness', choices=['Bad', 'Good', 'Perfect'])\n",
+    "                    fluency2 = gr.Radio(label='Fluency', choices=['Bad', 'Good', 'Perfect'])\n",
+    "                    utility2 = gr.Radio(label='Utility', choices=['Catastrophic', 'Bad', 'Good', 'Perfect'])\n",
+    "                    notes2 = gr.Textbox(label='Notes', placeholder='N/A')\n",
+    "\n",
+    "            # overall rating\n",
+    "            overall = gr.Radio(label='Overall Rating', choices=['#1 Better', 'Equally Bad', 'Equally Good', '#2 Better'])\n",
+    "            notes = gr.Textbox(label='Notes', placeholder='A brief justification for the overall rating')\n",
+    "\n",
+    "            # save button\n",
+    "            save_button = gr.Button('Save Task')\n",
+    "\n",
+    "            # input/output fields\n",
+    "            answers = (answer1, answer2)\n",
+    "            ratings1 = (groundedness1, fluency1, utility1, notes1)\n",
+    "            ratings2 = (groundedness2, fluency2, utility2, notes2)\n",
+    "            ratings = (*ratings1, *ratings2, overall, notes)\n",
+    "\n",
+    "            # button clicks\n",
+    "            load_button.click(self.load_task, inputs=[task_id], outputs=[_task_id, chat, sources, *answers, *ratings])\n",
+    "            save_button.click(self.save_task, inputs=[_task_id, *ratings], outputs=None)\n",
+    "\n",
+    "    def load_task(self, task_id):\n",
+    "        \"\"\"Load the task and parse the info.\"\"\"\n",
+    "        task = self.read_task(task_id)\n",
+    "        try:\n",
+    "            id = task['id']\n",
+    "            chat = task['chat_history'] + [[task['question'], task['search_query']]]\n",
+    "            answers = [task['answer_1'], task['answer_2']]\n",
+    "            sources = self.load_sources(task)\n",
+    "            ratings = self.load_ratings(task)\n",
+    "            gr.Info(f'Task {task_id} is loaded!')\n",
+    "            return id, chat, sources, *answers, *ratings\n",
+    "        except:\n",
+    "            raise gr.Error(f'Could not load the task {task_id} :(')\n",
+    "\n",
+    "    def read_task(self, task_id):\n",
+    "        \"\"\"Read the json task file.\"\"\"\n",
+    "        try:\n",
+    "            with open(f'./data/{task_id}.json') as task_file:\n",
+    "                task = json.load(task_file)\n",
+    "            return task\n",
+    "        except FileNotFoundError:\n",
+    "            raise gr.Error(f'Task {task_id} is not found :(')\n",
+    "\n",
+    "    def load_sources(self, task):\n",
+    "        \"\"\"Parse the search results.\"\"\"\n",
+    "        sources = ['## Search Results']\n",
+    "        for idx, source in enumerate(task['search_results']):\n",
+    "            sources.append(f'### {idx+1}. {source.replace(\"<\", f\"{chr(92)}<\")}')\n",
+    "        return '\\n\\n---\\n\\n'.join(sources + [''])\n",
+    "\n",
+    "    def load_ratings(self, task):\n",
+    "        \"\"\"Parse the ratings for each answer.\"\"\"\n",
+    "        # load ratings for answer 1\n",
+    "        ratings1 = (task['ratings_1']['groundedness'],\n",
+    "                    task['ratings_1']['fluency'],\n",
+    "                    task['ratings_1']['utility'],\n",
+    "                    task['ratings_1']['notes'])\n",
+    "        # load ratings for answer 2\n",
+    "        ratings2 = (task['ratings_2']['groundedness'],\n",
+    "                    task['ratings_2']['fluency'],\n",
+    "                    task['ratings_2']['utility'],\n",
+    "                    task['ratings_2']['notes'])\n",
+    "        # load overall ratings\n",
+    "        overall = task['overall']\n",
+    "        notes = task['notes']\n",
+    "        return (*ratings1, *ratings2, overall, notes)\n",
+    "\n",
+    "    def save_task(self, task_id, *ratings):\n",
+    "        \"\"\"Save the task into a new json file.\"\"\"\n",
+    "        # load the original task\n",
+    "        with open(f'./data/{task_id}.json') as task_file:\n",
+    "            task = json.load(task_file)\n",
+    "        # parse the ratings\n",
+    "        groundedness1, fluency1, utility1, notes1, \\\n",
+    "        groundedness2, fluency2, utility2, notes2, \\\n",
+    "        overall, notes = ratings\n",
+    "        # update the ratings for answer 1\n",
+    "        task['ratings_1']['groundedness'] = groundedness1\n",
+    "        task['ratings_1']['fluency'] = fluency1\n",
+    "        task['ratings_1']['utility'] = utility1\n",
+    "        task['ratings_1']['notes'] = notes1\n",
+    "        # update the ratings for answer 2\n",
+    "        task['ratings_2']['groundedness'] = groundedness2\n",
+    "        task['ratings_2']['fluency'] = fluency2\n",
+    "        task['ratings_2']['utility'] = utility2\n",
+    "        task['ratings_2']['notes'] = notes2\n",
+    "        # update overall ratings\n",
+    "        task['overall'] = overall\n",
+    "        task['notes'] = notes\n",
+    "        # save the task to json file\n",
+    "        try:\n",
+    "            with open(f'./data/{task_id}.json', 'w', encoding='utf-8') as task_file:\n",
+    "                json.dump(task, task_file, ensure_ascii=False, indent=4)\n",
+    "            gr.Info(f'Task {task_id} is saved!')\n",
+    "        except:\n",
+    "            raise gr.Error(f'Could not save the task {task_id} :(')\n",
+    "\n",
+    "    def launch_interface(self):\n",
+    "        \"\"\"Launch the A/B Evaluation RAG task interface.\"\"\"\n",
+    "        gr.close_all()\n",
+    "        self.interface.queue(default_concurrency_limit=None)\n",
+    "        self.interface.launch()\n",
+    "\n",
+    "\n",
+    "rag = RAGInterface()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "ade1097d-35ce-4f7a-a689-1b51973cbc70",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "id": "6707866e-8f1b-4bda-9b12-0008e289ab77",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# create placeholder tasks\n",
+    "import os\n",
+    "import json\n",
+    "\n",
+    "os.makedirs('./data/', exist_ok=True)\n",
+    "for idx in range(3):\n",
+    "    task = {\n",
+    "        'id': f'demo_task_{idx+1}',\n",
+    "        'chat_history': [['user message 1', 'bot message 1'], ['user message 2', 'bot message 2']],\n",
+    "        'question': 'question',\n",
+    "        'search_query': 'search query',\n",
+    "        'search_results': ['source 1', 'source 2', 'source 3'],\n",
+    "        'answer_1': 'answer 1',\n",
+    "        'answer_2': 'answer 2',\n",
+    "        'ratings_1': {'groundedness': 'null', 'utility': 'null', 'fluency': 'null', 'notes': ''},\n",
+    "        'ratings_2': {'groundedness': 'null', 'utility': 'null', 'fluency': 'null', 'notes': ''},\n",
+    "        'overall': 'null',\n",
+    "        'notes': ''\n",
+    "    }\n",
+    "    with open(f'./data/demo_task_{idx+1}.json', 'w', encoding='utf-8') as task_file:\n",
+    "        json.dump(task, task_file, ensure_ascii=False, indent=4)\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "d5023979-626b-4135-8805-3de1a846586e",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.12.1"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}