sukiboo commited on
Commit
570952a
·
1 Parent(s): 349f8b3

initial app interface

Browse files
Files changed (1) hide show
  1. app.ipynb +260 -0
app.ipynb ADDED
@@ -0,0 +1,260 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "markdown",
5
+ "id": "3bae1d7d-a2be-444d-97cc-d1cbf8843bf1",
6
+ "metadata": {},
7
+ "source": [
8
+ "# Invisible RAG Pilot Demo App"
9
+ ]
10
+ },
11
+ {
12
+ "cell_type": "code",
13
+ "execution_count": 3,
14
+ "id": "2a8e18f7-cc88-4bbf-a6e1-095237ed7714",
15
+ "metadata": {},
16
+ "outputs": [
17
+ {
18
+ "name": "stdout",
19
+ "output_type": "stream",
20
+ "text": [
21
+ "Running on local URL: http://127.0.0.1:7861\n",
22
+ "\n",
23
+ "To create a public link, set `share=True` in `launch()`.\n"
24
+ ]
25
+ },
26
+ {
27
+ "data": {
28
+ "text/html": [
29
+ "<div><iframe src=\"http://127.0.0.1:7861/\" width=\"100%\" height=\"500\" allow=\"autoplay; camera; microphone; clipboard-read; clipboard-write;\" frameborder=\"0\" allowfullscreen></iframe></div>"
30
+ ],
31
+ "text/plain": [
32
+ "<IPython.core.display.HTML object>"
33
+ ]
34
+ },
35
+ "metadata": {},
36
+ "output_type": "display_data"
37
+ }
38
+ ],
39
+ "source": [
40
+ "import json\n",
41
+ "import gradio as gr\n",
42
+ "\n",
43
+ "\n",
44
+ "class RAGInterface:\n",
45
+ " \"\"\"\n",
46
+ " Setup the gradio app for loading/saving/syncronizing the mockup A/B evaluation RAG tasks.\n",
47
+ " The app is deployed on Hugging Face spaces at https://huggingface.co/spaces/sukiboo/invisible-rag-demo\n",
48
+ " \"\"\"\n",
49
+ "\n",
50
+ " def __init__(self):\n",
51
+ " self.setup_interface()\n",
52
+ " self.launch_interface()\n",
53
+ "\n",
54
+ " def setup_interface(self):\n",
55
+ " \"\"\"Configure the A/B Evaluation RAG task interface.\"\"\"\n",
56
+ " with gr.Blocks(title='AB Evaluate RAG') as self.interface:\n",
57
+ "\n",
58
+ " # protected fields\n",
59
+ " _task_id = gr.Textbox(label='Task ID', interactive=False, visible=False)\n",
60
+ "\n",
61
+ " # task id, load button, chat history, search results\n",
62
+ " with gr.Row():\n",
63
+ " task_id = gr.Textbox(container=False, placeholder='Task ID', scale=9)\n",
64
+ " load_button = gr.Button('Load Task', scale=1)\n",
65
+ " chat = gr.Chatbot(height=700, layout='bubble', label='Chat History')\n",
66
+ " sources = gr.Markdown()\n",
67
+ "\n",
68
+ " # model completions for answers 1 and 2\n",
69
+ " with gr.Row():\n",
70
+ " with gr.Column():\n",
71
+ " answer1 = gr.Textbox(label='Answer 1', max_lines=50)\n",
72
+ " with gr.Column():\n",
73
+ " answer2 = gr.Textbox(label='Answer 2', max_lines=50)\n",
74
+ "\n",
75
+ " # individual ratings for answers 1 and 2\n",
76
+ " with gr.Row():\n",
77
+ " with gr.Column():\n",
78
+ " groundedness1 = gr.Radio(label='Groundedness', choices=['Bad', 'Good', 'Perfect'])\n",
79
+ " fluency1 = gr.Radio(label='Fluency', choices=['Bad', 'Good', 'Perfect'])\n",
80
+ " utility1 = gr.Radio(label='Utility', choices=['Catastrophic', 'Bad', 'Good', 'Perfect'])\n",
81
+ " notes1 = gr.Textbox(label='Notes', placeholder='N/A')\n",
82
+ " with gr.Column():\n",
83
+ " groundedness2 = gr.Radio(label='Groundedness', choices=['Bad', 'Good', 'Perfect'])\n",
84
+ " fluency2 = gr.Radio(label='Fluency', choices=['Bad', 'Good', 'Perfect'])\n",
85
+ " utility2 = gr.Radio(label='Utility', choices=['Catastrophic', 'Bad', 'Good', 'Perfect'])\n",
86
+ " notes2 = gr.Textbox(label='Notes', placeholder='N/A')\n",
87
+ "\n",
88
+ " # overall rating\n",
89
+ " overall = gr.Radio(label='Overall Rating', choices=['#1 Better', 'Equally Bad', 'Equally Good', '#2 Better'])\n",
90
+ " notes = gr.Textbox(label='Notes', placeholder='A brief justification for the overall rating')\n",
91
+ "\n",
92
+ " # save button\n",
93
+ " save_button = gr.Button('Save Task')\n",
94
+ "\n",
95
+ " # input/output fields\n",
96
+ " answers = (answer1, answer2)\n",
97
+ " ratings1 = (groundedness1, fluency1, utility1, notes1)\n",
98
+ " ratings2 = (groundedness2, fluency2, utility2, notes2)\n",
99
+ " ratings = (*ratings1, *ratings2, overall, notes)\n",
100
+ "\n",
101
+ " # button clicks\n",
102
+ " load_button.click(self.load_task, inputs=[task_id], outputs=[_task_id, chat, sources, *answers, *ratings])\n",
103
+ " save_button.click(self.save_task, inputs=[_task_id, *ratings], outputs=None)\n",
104
+ "\n",
105
+ " def load_task(self, task_id):\n",
106
+ " \"\"\"Load the task and parse the info.\"\"\"\n",
107
+ " task = self.read_task(task_id)\n",
108
+ " try:\n",
109
+ " id = task['id']\n",
110
+ " chat = task['chat_history'] + [[task['question'], task['search_query']]]\n",
111
+ " answers = [task['answer_1'], task['answer_2']]\n",
112
+ " sources = self.load_sources(task)\n",
113
+ " ratings = self.load_ratings(task)\n",
114
+ " gr.Info(f'Task {task_id} is loaded!')\n",
115
+ " return id, chat, sources, *answers, *ratings\n",
116
+ " except:\n",
117
+ " raise gr.Error(f'Could not load the task {task_id} :(')\n",
118
+ "\n",
119
+ " def read_task(self, task_id):\n",
120
+ " \"\"\"Read the json task file.\"\"\"\n",
121
+ " try:\n",
122
+ " with open(f'./data/{task_id}.json') as task_file:\n",
123
+ " task = json.load(task_file)\n",
124
+ " return task\n",
125
+ " except FileNotFoundError:\n",
126
+ " raise gr.Error(f'Task {task_id} is not found :(')\n",
127
+ "\n",
128
+ " def load_sources(self, task):\n",
129
+ " \"\"\"Parse the search results.\"\"\"\n",
130
+ " sources = ['## Search Results']\n",
131
+ " for idx, source in enumerate(task['search_results']):\n",
132
+ " sources.append(f'### {idx+1}. {source.replace(\"<\", f\"{chr(92)}<\")}')\n",
133
+ " return '\\n\\n---\\n\\n'.join(sources + [''])\n",
134
+ "\n",
135
+ " def load_ratings(self, task):\n",
136
+ " \"\"\"Parse the ratings for each answer.\"\"\"\n",
137
+ " # load ratings for answer 1\n",
138
+ " ratings1 = (task['ratings_1']['groundedness'],\n",
139
+ " task['ratings_1']['fluency'],\n",
140
+ " task['ratings_1']['utility'],\n",
141
+ " task['ratings_1']['notes'])\n",
142
+ " # load ratings for answer 2\n",
143
+ " ratings2 = (task['ratings_2']['groundedness'],\n",
144
+ " task['ratings_2']['fluency'],\n",
145
+ " task['ratings_2']['utility'],\n",
146
+ " task['ratings_2']['notes'])\n",
147
+ " # load overall ratings\n",
148
+ " overall = task['overall']\n",
149
+ " notes = task['notes']\n",
150
+ " return (*ratings1, *ratings2, overall, notes)\n",
151
+ "\n",
152
+ " def save_task(self, task_id, *ratings):\n",
153
+ " \"\"\"Save the task into a new json file.\"\"\"\n",
154
+ " # load the original task\n",
155
+ " with open(f'./data/{task_id}.json') as task_file:\n",
156
+ " task = json.load(task_file)\n",
157
+ " # parse the ratings\n",
158
+ " groundedness1, fluency1, utility1, notes1, \\\n",
159
+ " groundedness2, fluency2, utility2, notes2, \\\n",
160
+ " overall, notes = ratings\n",
161
+ " # update the ratings for answer 1\n",
162
+ " task['ratings_1']['groundedness'] = groundedness1\n",
163
+ " task['ratings_1']['fluency'] = fluency1\n",
164
+ " task['ratings_1']['utility'] = utility1\n",
165
+ " task['ratings_1']['notes'] = notes1\n",
166
+ " # update the ratings for answer 2\n",
167
+ " task['ratings_2']['groundedness'] = groundedness2\n",
168
+ " task['ratings_2']['fluency'] = fluency2\n",
169
+ " task['ratings_2']['utility'] = utility2\n",
170
+ " task['ratings_2']['notes'] = notes2\n",
171
+ " # update overall ratings\n",
172
+ " task['overall'] = overall\n",
173
+ " task['notes'] = notes\n",
174
+ " # save the task to json file\n",
175
+ " try:\n",
176
+ " with open(f'./data/{task_id}.json', 'w', encoding='utf-8') as task_file:\n",
177
+ " json.dump(task, task_file, ensure_ascii=False, indent=4)\n",
178
+ " gr.Info(f'Task {task_id} is saved!')\n",
179
+ " except:\n",
180
+ " raise gr.Error(f'Could not save the task {task_id} :(')\n",
181
+ "\n",
182
+ " def launch_interface(self):\n",
183
+ " \"\"\"Launch the A/B Evaluation RAG task interface.\"\"\"\n",
184
+ " gr.close_all()\n",
185
+ " self.interface.queue(default_concurrency_limit=None)\n",
186
+ " self.interface.launch()\n",
187
+ "\n",
188
+ "\n",
189
+ "rag = RAGInterface()"
190
+ ]
191
+ },
192
+ {
193
+ "cell_type": "code",
194
+ "execution_count": null,
195
+ "id": "ade1097d-35ce-4f7a-a689-1b51973cbc70",
196
+ "metadata": {},
197
+ "outputs": [],
198
+ "source": []
199
+ },
200
+ {
201
+ "cell_type": "code",
202
+ "execution_count": 4,
203
+ "id": "6707866e-8f1b-4bda-9b12-0008e289ab77",
204
+ "metadata": {},
205
+ "outputs": [],
206
+ "source": [
207
+ "# create placeholder tasks\n",
208
+ "import os\n",
209
+ "import json\n",
210
+ "\n",
211
+ "os.makedirs('./data/', exist_ok=True)\n",
212
+ "for idx in range(3):\n",
213
+ " task = {\n",
214
+ " 'id': f'demo_task_{idx+1}',\n",
215
+ " 'chat_history': [['user message 1', 'bot message 1'], ['user message 2', 'bot message 2']],\n",
216
+ " 'question': 'question',\n",
217
+ " 'search_query': 'search query',\n",
218
+ " 'search_results': ['source 1', 'source 2', 'source 3'],\n",
219
+ " 'answer_1': 'answer 1',\n",
220
+ " 'answer_2': 'answer 2',\n",
221
+ " 'ratings_1': {'groundedness': 'null', 'utility': 'null', 'fluency': 'null', 'notes': ''},\n",
222
+ " 'ratings_2': {'groundedness': 'null', 'utility': 'null', 'fluency': 'null', 'notes': ''},\n",
223
+ " 'overall': 'null',\n",
224
+ " 'notes': ''\n",
225
+ " }\n",
226
+ " with open(f'./data/demo_task_{idx+1}.json', 'w', encoding='utf-8') as task_file:\n",
227
+ " json.dump(task, task_file, ensure_ascii=False, indent=4)\n"
228
+ ]
229
+ },
230
+ {
231
+ "cell_type": "code",
232
+ "execution_count": null,
233
+ "id": "d5023979-626b-4135-8805-3de1a846586e",
234
+ "metadata": {},
235
+ "outputs": [],
236
+ "source": []
237
+ }
238
+ ],
239
+ "metadata": {
240
+ "kernelspec": {
241
+ "display_name": "Python 3 (ipykernel)",
242
+ "language": "python",
243
+ "name": "python3"
244
+ },
245
+ "language_info": {
246
+ "codemirror_mode": {
247
+ "name": "ipython",
248
+ "version": 3
249
+ },
250
+ "file_extension": ".py",
251
+ "mimetype": "text/x-python",
252
+ "name": "python",
253
+ "nbconvert_exporter": "python",
254
+ "pygments_lexer": "ipython3",
255
+ "version": "3.12.1"
256
+ }
257
+ },
258
+ "nbformat": 4,
259
+ "nbformat_minor": 5
260
+ }