{
 "cells": [
  {
   "cell_type": "markdown",
   "id": "3bae1d7d-a2be-444d-97cc-d1cbf8843bf1",
   "metadata": {},
   "source": [
    "# Invisible RAG Pilot Demo App"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "2a8e18f7-cc88-4bbf-a6e1-095237ed7714",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Running on local URL:  http://127.0.0.1:7867\n",
      "\n",
      "To create a public link, set `share=True` in `launch()`.\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div><iframe src=\"http://127.0.0.1:7867/\" width=\"100%\" height=\"500\" allow=\"autoplay; camera; microphone; clipboard-read; clipboard-write;\" frameborder=\"0\" allowfullscreen></iframe></div>"
      ],
      "text/plain": [
       "<IPython.core.display.HTML object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "import json\n",
    "import gradio as gr\n",
    "\n",
    "\n",
    "class RAGInterface:\n",
    "    \"\"\"\n",
    "    Setup the gradio app for loading/saving/syncronizing the mockup A/B evaluation RAG tasks.\n",
    "    The app is deployed on Hugging Face spaces at https://huggingface.co/spaces/sukiboo/invisible-rag-demo\n",
    "    \"\"\"\n",
    "\n",
    "    def __init__(self):\n",
    "        self.setup_interface()\n",
    "        self.launch_interface()\n",
    "\n",
    "    def setup_interface(self):\n",
    "        \"\"\"Configure the A/B Evaluation RAG task interface.\"\"\"\n",
    "        with gr.Blocks(title='Demo AB Evaluate RAG') as self.interface:\n",
    "\n",
    "            # protected fields\n",
    "            _task_id = gr.Textbox(label='Task ID', interactive=False, visible=False)\n",
    "\n",
    "            # task id and load/save/reset buttons\n",
    "            with gr.Row():\n",
    "                task_id = gr.Textbox(container=False, placeholder='Task ID', scale=9)\n",
    "                load_button = gr.Button('Load Task', scale=1)\n",
    "                save_button = gr.Button('Save Task', scale=1, variant='primary')\n",
    "                reset_button = gr.Button('Reset Task', scale=1, variant='stop')\n",
    "\n",
    "            # chat history and search results\n",
    "            chat = gr.Chatbot(height=700, layout='bubble', bubble_full_width=False, label='Chat History')\n",
    "            sources = gr.Markdown()\n",
    "\n",
    "            # model completions for answers 1 and 2\n",
    "            with gr.Row():\n",
    "                with gr.Column():\n",
    "                    answer1 = gr.Textbox(label='Answer 1', max_lines=50)\n",
    "                with gr.Column():\n",
    "                    answer2 = gr.Textbox(label='Answer 2', max_lines=50)\n",
    "\n",
    "            # individual ratings for answers 1 and 2\n",
    "            with gr.Row():\n",
    "                with gr.Column():\n",
    "                    groundedness1 = gr.Radio(label='Groundedness', choices=['Bad', 'Good', 'Perfect'])\n",
    "                    fluency1 = gr.Radio(label='Fluency', choices=['Bad', 'Good', 'Perfect'])\n",
    "                    utility1 = gr.Radio(label='Utility', choices=['Catastrophic', 'Bad', 'Good', 'Perfect'])\n",
    "                    notes1 = gr.Textbox(label='Notes', placeholder='N/A')\n",
    "                with gr.Column():\n",
    "                    groundedness2 = gr.Radio(label='Groundedness', choices=['Bad', 'Good', 'Perfect'])\n",
    "                    fluency2 = gr.Radio(label='Fluency', choices=['Bad', 'Good', 'Perfect'])\n",
    "                    utility2 = gr.Radio(label='Utility', choices=['Catastrophic', 'Bad', 'Good', 'Perfect'])\n",
    "                    notes2 = gr.Textbox(label='Notes', placeholder='N/A')\n",
    "\n",
    "            # overall rating\n",
    "            overall = gr.Radio(label='Overall Rating', choices=['#1 Better', 'Equally Bad', 'Equally Good', '#2 Better'])\n",
    "            notes = gr.Textbox(label='Notes', placeholder='A brief justification for the overall rating')\n",
    "\n",
    "            # input/output fields\n",
    "            answers = (answer1, answer2)\n",
    "            ratings1 = (groundedness1, fluency1, utility1, notes1)\n",
    "            ratings2 = (groundedness2, fluency2, utility2, notes2)\n",
    "            ratings = (*ratings1, *ratings2, overall, notes)\n",
    "\n",
    "            # button clicks\n",
    "            load_button.click(self.load_task, inputs=[task_id], outputs=[_task_id, chat, sources, *answers, *ratings])\n",
    "            save_button.click(self.save_task, inputs=[_task_id, *ratings], outputs=None)\n",
    "            reset_button.click(self.reset_task, inputs=[_task_id], outputs=[*ratings])\n",
    "\n",
    "    def load_task(self, task_id):\n",
    "        \"\"\"Load the task and parse the info.\"\"\"\n",
    "        task = self.read_task(task_id)\n",
    "        try:\n",
    "            id = task['id']\n",
    "            chat = task['chat_history'] + [[task['question'], task['search_query']]]\n",
    "            answers = [task['answer_1'], task['answer_2']]\n",
    "            sources = self.load_sources(task)\n",
    "            ratings = self.load_ratings(task)\n",
    "            gr.Info(f'Task demo_task_{task_id} is loaded!')\n",
    "            return id, chat, sources, *answers, *ratings\n",
    "        except:\n",
    "            raise gr.Error(f'Could not load the task demo_task_{task_id} :(')\n",
    "\n",
    "    def read_task(self, task_id):\n",
    "        \"\"\"Read the json task file.\"\"\"\n",
    "        try:\n",
    "            with open(f'./data/demo_task_{task_id}.json') as task_file:\n",
    "                task = json.load(task_file)\n",
    "            return task\n",
    "        except FileNotFoundError:\n",
    "            raise gr.Error(f'Task demo_task_{task_id} is not found :(')\n",
    "\n",
    "    def load_sources(self, task):\n",
    "        \"\"\"Parse the search results.\"\"\"\n",
    "        sources = ['### Search Results']\n",
    "        for idx, source in enumerate(task['search_results']):\n",
    "            sources.append(f'##### {idx+1}. {source.replace(\"<\", f\"{chr(92)}<\")}')\n",
    "        return '\\n---\\n'.join(sources + [''])\n",
    "\n",
    "    def load_ratings(self, task):\n",
    "        \"\"\"Parse the ratings for each answer.\"\"\"\n",
    "        # load ratings for answer 1\n",
    "        ratings1 = (task['ratings_1']['groundedness'],\n",
    "                    task['ratings_1']['fluency'],\n",
    "                    task['ratings_1']['utility'],\n",
    "                    task['ratings_1']['notes'])\n",
    "        # load ratings for answer 2\n",
    "        ratings2 = (task['ratings_2']['groundedness'],\n",
    "                    task['ratings_2']['fluency'],\n",
    "                    task['ratings_2']['utility'],\n",
    "                    task['ratings_2']['notes'])\n",
    "        # load overall ratings\n",
    "        overall = task['overall']\n",
    "        notes = task['notes']\n",
    "        return (*ratings1, *ratings2, overall, notes)\n",
    "\n",
    "    def save_task(self, task_id, *ratings):\n",
    "        \"\"\"Save the task into a new json file.\"\"\"\n",
    "        # load the original task\n",
    "        with open(f'./data/demo_task_{task_id}.json') as task_file:\n",
    "            task = json.load(task_file)\n",
    "        # parse the ratings\n",
    "        groundedness1, fluency1, utility1, notes1, \\\n",
    "        groundedness2, fluency2, utility2, notes2, \\\n",
    "        overall, notes = ratings\n",
    "        # update the ratings for answer 1\n",
    "        task['ratings_1']['groundedness'] = groundedness1\n",
    "        task['ratings_1']['fluency'] = fluency1\n",
    "        task['ratings_1']['utility'] = utility1\n",
    "        task['ratings_1']['notes'] = notes1\n",
    "        # update the ratings for answer 2\n",
    "        task['ratings_2']['groundedness'] = groundedness2\n",
    "        task['ratings_2']['fluency'] = fluency2\n",
    "        task['ratings_2']['utility'] = utility2\n",
    "        task['ratings_2']['notes'] = notes2\n",
    "        # update overall ratings\n",
    "        task['overall'] = overall\n",
    "        task['notes'] = notes\n",
    "        # save the task to json file\n",
    "        try:\n",
    "            with open(f'./data/demo_task_{task_id}.json', 'w', encoding='utf-8') as task_file:\n",
    "                json.dump(task, task_file, ensure_ascii=False, indent=4)\n",
    "            gr.Info(f'Task demo_task_{task_id} is saved!')\n",
    "        except:\n",
    "            raise gr.Error(f'Could not save the task demo_task_{task_id} :(')\n",
    "\n",
    "    def reset_task(self, task_id):\n",
    "        \"\"\"Reset the task by erasing the ratings and operator notes.\"\"\"\n",
    "        # load the original task\n",
    "        with open(f'./data/demo_task_{task_id}.json') as task_file:\n",
    "            task = json.load(task_file)\n",
    "        # erase the ratings for answer 1\n",
    "        task['ratings_1']['groundedness'] = ''\n",
    "        task['ratings_1']['fluency'] = ''\n",
    "        task['ratings_1']['utility'] = ''\n",
    "        task['ratings_1']['notes'] = ''\n",
    "        # erase the ratings for answer 2\n",
    "        task['ratings_2']['groundedness'] = ''\n",
    "        task['ratings_2']['fluency'] = ''\n",
    "        task['ratings_2']['utility'] = ''\n",
    "        task['ratings_2']['notes'] = ''\n",
    "        # erase overall ratings\n",
    "        task['overall'] = ''\n",
    "        task['notes'] = ''\n",
    "        # save the reset task to json file\n",
    "        try:\n",
    "            with open(f'./data/demo_task_{task_id}.json', 'w', encoding='utf-8') as task_file:\n",
    "                json.dump(task, task_file, ensure_ascii=False, indent=4)\n",
    "            gr.Warning(f'Task demo_task_{task_id} is reset!')\n",
    "        except:\n",
    "            raise gr.Error(f'Could not reset the task demo_task_{task_id} :(')\n",
    "        return '', '', '', '', '', '', '', '', '', ''\n",
    "\n",
    "    def launch_interface(self):\n",
    "        \"\"\"Launch the A/B Evaluation RAG task interface.\"\"\"\n",
    "        gr.close_all()\n",
    "        self.interface.queue(default_concurrency_limit=None)\n",
    "        self.interface.launch()\n",
    "\n",
    "\n",
    "rag = RAGInterface()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "ade1097d-35ce-4f7a-a689-1b51973cbc70",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "id": "6707866e-8f1b-4bda-9b12-0008e289ab77",
   "metadata": {},
   "outputs": [],
   "source": [
    "# create placeholder tasks\n",
    "import os\n",
    "import json\n",
    "\n",
    "os.makedirs('./data/', exist_ok=True)\n",
    "for idx in range(3):\n",
    "    task = {\n",
    "        'id': f'{idx+1}',\n",
    "        'chat_history': [['user message 1', 'bot message 1'], ['user message 2', 'bot message 2']],\n",
    "        'question': 'question',\n",
    "        'search_query': 'search query',\n",
    "        'search_results': ['source 1', 'source 2', 'source 3'],\n",
    "        'answer_1': 'answer 1',\n",
    "        'answer_2': 'answer 2',\n",
    "        'ratings_1': {'groundedness': '', 'utility': '', 'fluency': '', 'notes': ''},\n",
    "        'ratings_2': {'groundedness': '', 'utility': '', 'fluency': '', 'notes': ''},\n",
    "        'overall': '',\n",
    "        'notes': ''\n",
    "    }\n",
    "    with open(f'./data/demo_task_{idx+1}.json', 'w', encoding='utf-8') as task_file:\n",
    "        json.dump(task, task_file, ensure_ascii=False, indent=4)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "d5023979-626b-4135-8805-3de1a846586e",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.12.1"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}