Spaces:

sukiboo
/

invisible-rag-demo

Sleeping

File size: 16,802 Bytes

{
 "cells": [
  {
   "cell_type": "markdown",
   "id": "3bae1d7d-a2be-444d-97cc-d1cbf8843bf1",
   "metadata": {},
   "source": [
    "# Invisible RAG Pilot Demo App"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "2a8e18f7-cc88-4bbf-a6e1-095237ed7714",
   "metadata": {},
   "outputs": [],
   "source": [
    "import json\n",
    "import gspread\n",
    "import gradio as gr\n",
    "\n",
    "\n",
    "class RAGInterface:\n",
    "    \"\"\"\n",
    "    Setup the gradio app for loading/saving/syncronizing the mockup A/B evaluation RAG tasks.\n",
    "    The app is deployed on Hugging Face spaces at https://huggingface.co/spaces/sukiboo/invisible-rag-demo\n",
    "    \"\"\"\n",
    "\n",
    "    def __init__(self):\n",
    "        self.setup_interface()\n",
    "        self.launch_interface()\n",
    "\n",
    "    def setup_interface(self):\n",
    "        \"\"\"Configure the A/B Evaluation RAG task interface.\"\"\"\n",
    "        with gr.Blocks(title='Demo AB Evaluate RAG') as self.interface:\n",
    "\n",
    "            # protected fields\n",
    "            _task_id = gr.Textbox(label='Task ID', interactive=False, visible=False)\n",
    "\n",
    "            # task id and load/save/reset buttons\n",
    "            with gr.Row():\n",
    "                task_id = gr.Textbox(container=False, placeholder='Enter a task ID: 1--11', scale=9)\n",
    "                load_button = gr.Button('Load Task', scale=1)\n",
    "                save_button = gr.Button('Save Task', scale=1, variant='primary')\n",
    "                reset_button = gr.Button('Reset Task', scale=1, variant='stop')\n",
    "\n",
    "            # chat history and search results\n",
    "            chat = gr.Chatbot(height=700, layout='bubble', bubble_full_width=False, label='Chat History')\n",
    "            sources = gr.Markdown()\n",
    "\n",
    "            # model completions for answers 1 and 2\n",
    "            with gr.Row():\n",
    "                with gr.Column():\n",
    "                    answer1 = gr.Textbox(label='Answer 1', max_lines=50)\n",
    "                with gr.Column():\n",
    "                    answer2 = gr.Textbox(label='Answer 2', max_lines=50)\n",
    "\n",
    "            # individual ratings for answers 1 and 2\n",
    "            with gr.Row():\n",
    "                with gr.Column():\n",
    "                    groundedness1 = gr.Radio(label='Groundedness', choices=['Bad', 'Good', 'Perfect'])\n",
    "                    fluency1 = gr.Radio(label='Fluency', choices=['Bad', 'Good', 'Perfect'])\n",
    "                    utility1 = gr.Radio(label='Utility', choices=['Catastrophic', 'Bad', 'Good', 'Perfect'])\n",
    "                    notes1 = gr.Textbox(label='Notes', placeholder='N/A')\n",
    "                with gr.Column():\n",
    "                    groundedness2 = gr.Radio(label='Groundedness', choices=['Bad', 'Good', 'Perfect'])\n",
    "                    fluency2 = gr.Radio(label='Fluency', choices=['Bad', 'Good', 'Perfect'])\n",
    "                    utility2 = gr.Radio(label='Utility', choices=['Catastrophic', 'Bad', 'Good', 'Perfect'])\n",
    "                    notes2 = gr.Textbox(label='Notes', placeholder='N/A')\n",
    "\n",
    "            # overall rating\n",
    "            overall = gr.Radio(label='Overall Rating', choices=['#1 Better', 'Equally Bad', 'Equally Good', '#2 Better'])\n",
    "            notes = gr.Textbox(label='Notes', placeholder='A brief justification for the overall rating')\n",
    "\n",
    "            # input/output fields\n",
    "            answers = (answer1, answer2)\n",
    "            ratings1 = (groundedness1, fluency1, utility1, notes1)\n",
    "            ratings2 = (groundedness2, fluency2, utility2, notes2)\n",
    "            ratings = (*ratings1, *ratings2, overall, notes)\n",
    "\n",
    "            # button clicks\n",
    "            load_button.click(self.load_task, inputs=[task_id], outputs=[_task_id, chat, sources, *answers, *ratings])\n",
    "            save_button.click(self.save_task, inputs=[_task_id, *ratings], outputs=None)\n",
    "            reset_button.click(self.reset_task, inputs=[_task_id], outputs=[*ratings])\n",
    "\n",
    "    def load_task(self, task_id):\n",
    "        \"\"\"Load the task and parse the info.\"\"\"\n",
    "        task = self.read_task(task_id)\n",
    "        try:\n",
    "            id = task['id']\n",
    "            chat = task['chat_history'] + [[task['question'], task['search_query']]]\n",
    "            answers = [task['answer_1'], task['answer_2']]\n",
    "            sources = self.load_sources(task)\n",
    "            ratings = self.load_ratings(task)\n",
    "            gr.Info(f'Task demo_task_{task_id} is loaded!')\n",
    "            return id, chat, sources, *answers, *ratings\n",
    "        except:\n",
    "            raise gr.Error(f'Could not load the task demo_task_{task_id} :(')\n",
    "\n",
    "    def read_task(self, task_id):\n",
    "        \"\"\"Read the json task file.\"\"\"\n",
    "        try:\n",
    "            with open(f'./data/demo_task_{task_id}.json') as task_file:\n",
    "                task = json.load(task_file)\n",
    "            return task\n",
    "        except FileNotFoundError:\n",
    "            raise gr.Error(f'Task demo_task_{task_id} is not found :(')\n",
    "\n",
    "    def load_sources(self, task):\n",
    "        \"\"\"Parse the search results.\"\"\"\n",
    "        sources = []\n",
    "        for idx, source in enumerate(task['search_results']):\n",
    "            sources.append(f'##### {idx+1}. {source.replace(\"<\", f\"{chr(92)}<\")}\\n')\n",
    "        return '\\n---\\n'.join(['## Search Results'] + sources + ['']) if sources else ''\n",
    "\n",
    "    def load_ratings(self, task):\n",
    "        \"\"\"Parse the ratings for each answer.\"\"\"\n",
    "        # load ratings for answer 1\n",
    "        ratings1 = (task['ratings_1']['groundedness'],\n",
    "                    task['ratings_1']['fluency'],\n",
    "                    task['ratings_1']['utility'],\n",
    "                    task['ratings_1']['notes'])\n",
    "        # load ratings for answer 2\n",
    "        ratings2 = (task['ratings_2']['groundedness'],\n",
    "                    task['ratings_2']['fluency'],\n",
    "                    task['ratings_2']['utility'],\n",
    "                    task['ratings_2']['notes'])\n",
    "        # load overall ratings\n",
    "        overall = task['overall']\n",
    "        notes = task['notes']\n",
    "        return (*ratings1, *ratings2, overall, notes)\n",
    "\n",
    "    def save_task(self, task_id, *ratings):\n",
    "        \"\"\"Save the task into a new json file.\"\"\"\n",
    "        # load the original task\n",
    "        with open(f'./data/demo_task_{task_id}.json') as task_file:\n",
    "            task = json.load(task_file)\n",
    "        # parse the ratings\n",
    "        groundedness1, fluency1, utility1, notes1, \\\n",
    "        groundedness2, fluency2, utility2, notes2, \\\n",
    "        overall, notes = ratings\n",
    "        # update the ratings for answer 1\n",
    "        task['ratings_1']['groundedness'] = groundedness1\n",
    "        task['ratings_1']['fluency'] = fluency1\n",
    "        task['ratings_1']['utility'] = utility1\n",
    "        task['ratings_1']['notes'] = notes1\n",
    "        # update the ratings for answer 2\n",
    "        task['ratings_2']['groundedness'] = groundedness2\n",
    "        task['ratings_2']['fluency'] = fluency2\n",
    "        task['ratings_2']['utility'] = utility2\n",
    "        task['ratings_2']['notes'] = notes2\n",
    "        # update overall ratings\n",
    "        task['overall'] = overall\n",
    "        task['notes'] = notes\n",
    "        try:\n",
    "            # save the task to json file\n",
    "            with open(f'./data/demo_task_{task_id}.json', 'w', encoding='utf-8') as task_file:\n",
    "                json.dump(task, task_file, ensure_ascii=False, indent=4)\n",
    "            # save the task to google spreadsheet\n",
    "            self.save_gsheet(task_id, ratings)\n",
    "            gr.Info(f'Task demo_task_{task_id} is saved!')\n",
    "        except:\n",
    "            raise gr.Error(f'Could not save the task demo_task_{task_id} :(')\n",
    "\n",
    "    def reset_task(self, task_id):\n",
    "        \"\"\"Reset the task by erasing the ratings and operator notes.\"\"\"\n",
    "        # load the original task\n",
    "        with open(f'./data/demo_task_{task_id}.json') as task_file:\n",
    "            task = json.load(task_file)\n",
    "        # erase the ratings for answer 1\n",
    "        task['ratings_1']['groundedness'] = ''\n",
    "        task['ratings_1']['fluency'] = ''\n",
    "        task['ratings_1']['utility'] = ''\n",
    "        task['ratings_1']['notes'] = ''\n",
    "        # erase the ratings for answer 2\n",
    "        task['ratings_2']['groundedness'] = ''\n",
    "        task['ratings_2']['fluency'] = ''\n",
    "        task['ratings_2']['utility'] = ''\n",
    "        task['ratings_2']['notes'] = ''\n",
    "        # erase overall ratings\n",
    "        task['overall'] = ''\n",
    "        task['notes'] = ''\n",
    "        try:\n",
    "            # save the reset task to json file\n",
    "            with open(f'./data/demo_task_{task_id}.json', 'w', encoding='utf-8') as task_file:\n",
    "                json.dump(task, task_file, ensure_ascii=False, indent=4)\n",
    "            # save the reset task to google spreadsheet\n",
    "            self.reset_gsheet(task_id)\n",
    "            gr.Info(f'Task demo_task_{task_id} is reset!')\n",
    "        except:\n",
    "            raise gr.Error(f'Could not reset the task demo_task_{task_id} :(')\n",
    "        return '', '', '', '', '', '', '', '', '', ''\n",
    "\n",
    "    def save_gsheet(self, task_id, ratings):\n",
    "        \"\"\"Save the task to google spreadsheet.\"\"\"\n",
    "        try:\n",
    "            # configure gsheet credentials\n",
    "            gc = gspread.service_account('./gsheet_service_account.json')\n",
    "            sheet_id = '1D2sfE9YXKtd7cKlgalo5UnuNKC-GhxlGqHVYUlkQlCY'\n",
    "            sh = gc.open_by_key(sheet_id).worksheet('demo-app')\n",
    "            # update non-empty task ratings in the worksheet\n",
    "            cell_val = []\n",
    "            for idx, rating in enumerate([ratings[idx] for idx in [0,1,2,4,5,6,8,9]]):\n",
    "                if rating:\n",
    "                    cell_val.append({'range': f'{chr(67+idx)}{3+int(task_id)}', 'values': [[f'{rating}']]})\n",
    "            sh.batch_update(cell_val)\n",
    "        except:\n",
    "            gr.Warning(f'Could not save the task demo_task_{task_id} to the spreadsheet :(')\n",
    "\n",
    "    def reset_gsheet(self, task_id):\n",
    "        \"\"\"Reset the task ratings in google spreadsheet.\"\"\"\n",
    "        try:\n",
    "            # configure gsheet credentials\n",
    "            gc = gspread.service_account('./gsheet_service_account.json')\n",
    "            sheet_id = '1D2sfE9YXKtd7cKlgalo5UnuNKC-GhxlGqHVYUlkQlCY'\n",
    "            sh = gc.open_by_key(sheet_id).worksheet('demo-app')\n",
    "            # update task ratings in the worksheet if there are any (clearing empty values removes formatting)\n",
    "            if sh.get(f'C{3+int(task_id)}:J{3+int(task_id)}')[0]:\n",
    "                sh.batch_clear([f'C{3+int(task_id)}:J{3+int(task_id)}'])\n",
    "        except:\n",
    "            gr.Warning(f'Could not reset the task demo_task_{task_id} in the spreadsheet :(')\n",
    "\n",
    "    def launch_interface(self):\n",
    "        \"\"\"Launch the A/B Evaluation RAG task interface.\"\"\"\n",
    "        gr.close_all()\n",
    "        self.interface.queue(default_concurrency_limit=None)\n",
    "        self.interface.launch()\n",
    "\n",
    "\n",
    "rag = RAGInterface()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "ade1097d-35ce-4f7a-a689-1b51973cbc70",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "6707866e-8f1b-4bda-9b12-0008e289ab77",
   "metadata": {},
   "outputs": [],
   "source": [
    "# create placeholder tasks\n",
    "import os\n",
    "import json\n",
    "\n",
    "os.makedirs('./data/', exist_ok=True)\n",
    "for idx in range(1):\n",
    "    task = {\n",
    "        'id': f'{idx}',\n",
    "        'chat_history': [['user message 1', 'bot message 1'], ['user message 2', 'bot message 2']],\n",
    "        'question': 'question',\n",
    "        'search_query': 'search query',\n",
    "        'search_results': ['source 1', 'source 2', 'source 3'],\n",
    "        'answer_1': 'answer 1',\n",
    "        'answer_2': 'answer 2',\n",
    "        'ratings_1': {'groundedness': '', 'utility': '', 'fluency': '', 'notes': ''},\n",
    "        'ratings_2': {'groundedness': '', 'utility': '', 'fluency': '', 'notes': ''},\n",
    "        'overall': '',\n",
    "        'notes': ''\n",
    "    }\n",
    "    with open(f'./data/demo_task_{idx}.json', 'w', encoding='utf-8') as task_file:\n",
    "        json.dump(task, task_file, ensure_ascii=False, indent=4)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "e99c2d79-d544-4d30-ab22-6452385d3593",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "98f44682-2088-4925-8e2a-563197a50b66",
   "metadata": {},
   "outputs": [],
   "source": [
    "# make demo tasks from the csv of the spreadsheet\n",
    "# https://docs.google.com/spreadsheets/d/1kYW0cABv2C-mMmmw2Uc50mQC0MmOuoqKJQaBp7IyCho/edit#gid=1934745276\n",
    "import os\n",
    "import json\n",
    "import pandas as pd\n",
    "\n",
    "df = pd.read_csv('./dev.csv')\n",
    "df\n",
    "\n",
    "os.makedirs('./data/', exist_ok=True)\n",
    "for idx in range(len(df)):\n",
    "    row = df.iloc[idx]\n",
    "    task = {\n",
    "        'id': f'{idx+1}',\n",
    "        'chat_history': [],\n",
    "        'question': f'{row[\"question\"]}',\n",
    "        'search_query': '',\n",
    "        'search_results': [],\n",
    "        'answer_1': f'{row[\"answer_1\"]}',\n",
    "        'answer_2': f'{row[\"answer_2\"]}',\n",
    "        'ratings_1': {'groundedness': '', 'utility': '', 'fluency': '', 'notes': ''},\n",
    "        'ratings_2': {'groundedness': '', 'utility': '', 'fluency': '', 'notes': ''},\n",
    "        'overall': '',\n",
    "        'notes': ''\n",
    "    }\n",
    "\n",
    "    # chat history\n",
    "    try:\n",
    "        i = 1\n",
    "        while not pd.isna(row[f'user message {i}']):\n",
    "            task['chat_history'].append([row[f'user message {i}'], row[f'bot message {i}']])\n",
    "            i += 1\n",
    "    except:\n",
    "        pass\n",
    "\n",
    "    # search query\n",
    "    if not pd.isna(row['search_2']):\n",
    "        task['search_query'] = f'{row[\"search_1\"]}\\n{row[\"search_2\"]}'\n",
    "    else:\n",
    "        task['search_query'] = f'{row[\"search_1\"]}'\n",
    "\n",
    "    # search results\n",
    "    try:\n",
    "        i = 1\n",
    "        while not pd.isna(row[f'source {i}']):\n",
    "            task['search_results'].append(row[f'source {i}'])\n",
    "            i += 1\n",
    "    except:\n",
    "        pass\n",
    "\n",
    "    # save the task\n",
    "    with open(f'./data/demo_task_{idx+1}.json', 'w', encoding='utf-8') as task_file:\n",
    "        json.dump(task, task_file, ensure_ascii=False, indent=4)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "d293fcca-659d-41c5-b043-15fd2e57b216",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "9ab3763f-fa7b-406b-9bc4-22bc4f7a4ea3",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "d5023979-626b-4135-8805-3de1a846586e",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.12.1"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}