# Invisible RAG Pilot Demo App

In [None]:
import json
import gspread
import gradio as gr


class RAGInterface:
 """
 Setup the gradio app for loading/saving/syncronizing the mockup A/B evaluation RAG tasks.
 The app is deployed on Hugging Face spaces at https://huggingface.co/spaces/sukiboo/invisible-rag-demo
 """

 def __init__(self):
 self.setup_interface()
 self.launch_interface()

 def setup_interface(self):
 """Configure the A/B Evaluation RAG task interface."""
 with gr.Blocks(title='Demo AB Evaluate RAG') as self.interface:

 # protected fields
 _task_id = gr.Textbox(label='Task ID', interactive=False, visible=False)

 # task id and load/save/reset buttons
 with gr.Row():
 task_id = gr.Textbox(container=False, placeholder='Enter a task ID: 1--11', scale=9)
 load_button = gr.Button('Load Task', scale=1)
 save_button = gr.Button('Save Task', scale=1, variant='primary')
 reset_button = gr.Button('Reset Task', scale=1, variant='stop')

 # chat history and search results
 chat = gr.Chatbot(height=700, layout='bubble', bubble_full_width=False, label='Chat History')
 sources = gr.Markdown()

 # model completions for answers 1 and 2
 with gr.Row():
 with gr.Column():
 answer1 = gr.Textbox(label='Answer 1', max_lines=50)
 with gr.Column():
 answer2 = gr.Textbox(label='Answer 2', max_lines=50)

 # individual ratings for answers 1 and 2
 with gr.Row():
 with gr.Column():
 groundedness1 = gr.Radio(label='Groundedness', choices=['Bad', 'Good', 'Perfect'])
 fluency1 = gr.Radio(label='Fluency', choices=['Bad', 'Good', 'Perfect'])
 utility1 = gr.Radio(label='Utility', choices=['Catastrophic', 'Bad', 'Good', 'Perfect'])
 notes1 = gr.Textbox(label='Notes', placeholder='N/A')
 with gr.Column():
 groundedness2 = gr.Radio(label='Groundedness', choices=['Bad', 'Good', 'Perfect'])
 fluency2 = gr.Radio(label='Fluency', choices=['Bad', 'Good', 'Perfect'])
 utility2 = gr.Radio(label='Utility', choices=['Catastrophic', 'Bad', 'Good', 'Perfect'])
 notes2 = gr.Textbox(label='Notes', placeholder='N/A')

 # overall rating
 overall = gr.Radio(label='Overall Rating', choices=['#1 Better', 'Equally Bad', 'Equally Good', '#2 Better'])
 notes = gr.Textbox(label='Notes', placeholder='A brief justification for the overall rating')

 # input/output fields
 answers = (answer1, answer2)
 ratings1 = (groundedness1, fluency1, utility1, notes1)
 ratings2 = (groundedness2, fluency2, utility2, notes2)
 ratings = (*ratings1, *ratings2, overall, notes)

 # button clicks
 load_button.click(self.load_task, inputs=[task_id], outputs=[_task_id, chat, sources, *answers, *ratings])
 save_button.click(self.save_task, inputs=[_task_id, *ratings], outputs=None)
 reset_button.click(self.reset_task, inputs=[_task_id], outputs=[*ratings])

 def load_task(self, task_id):
 """Load the task and parse the info."""
 task = self.read_task(task_id)
 try:
 id = task['id']
 chat = task['chat_history'] + [[task['question'], task['search_query']]]
 answers = [task['answer_1'], task['answer_2']]
 sources = self.load_sources(task)
 ratings = self.load_ratings(task)
 gr.Info(f'Task demo_task_{task_id} is loaded!')
 return id, chat, sources, *answers, *ratings
 except:
 raise gr.Error(f'Could not load the task demo_task_{task_id} :(')

 def read_task(self, task_id):
 """Read the json task file."""
 try:
 with open(f'./data/demo_task_{task_id}.json') as task_file:
 task = json.load(task_file)
 return task
 except FileNotFoundError:
 raise gr.Error(f'Task demo_task_{task_id} is not found :(')

 def load_sources(self, task):
 """Parse the search results."""
 sources = []
 for idx, source in enumerate(task['search_results']):
 sources.append(f'##### {idx+1}. {source.replace("<", f"{chr(92)}<")}\n')
 return '\n---\n'.join(['## Search Results'] + sources + ['']) if sources else ''

 def load_ratings(self, task):
 """Parse the ratings for each answer."""
 # load ratings for answer 1
 ratings1 = (task['ratings_1']['groundedness'],
 task['ratings_1']['fluency'],
 task['ratings_1']['utility'],
 task['ratings_1']['notes'])
 # load ratings for answer 2
 ratings2 = (task['ratings_2']['groundedness'],
 task['ratings_2']['fluency'],
 task['ratings_2']['utility'],
 task['ratings_2']['notes'])
 # load overall ratings
 overall = task['overall']
 notes = task['notes']
 return (*ratings1, *ratings2, overall, notes)

 def save_task(self, task_id, *ratings):
 """Save the task into a new json file."""
 # load the original task
 with open(f'./data/demo_task_{task_id}.json') as task_file:
 task = json.load(task_file)
 # parse the ratings
 groundedness1, fluency1, utility1, notes1, \
 groundedness2, fluency2, utility2, notes2, \
 overall, notes = ratings
 # update the ratings for answer 1
 task['ratings_1']['groundedness'] = groundedness1
 task['ratings_1']['fluency'] = fluency1
 task['ratings_1']['utility'] = utility1
 task['ratings_1']['notes'] = notes1
 # update the ratings for answer 2
 task['ratings_2']['groundedness'] = groundedness2
 task['ratings_2']['fluency'] = fluency2
 task['ratings_2']['utility'] = utility2
 task['ratings_2']['notes'] = notes2
 # update overall ratings
 task['overall'] = overall
 task['notes'] = notes
 try:
 # save the task to json file
 with open(f'./data/demo_task_{task_id}.json', 'w', encoding='utf-8') as task_file:
 json.dump(task, task_file, ensure_ascii=False, indent=4)
 # save the task to google spreadsheet
 self.save_gsheet(task_id, ratings)
 gr.Info(f'Task demo_task_{task_id} is saved!')
 except:
 raise gr.Error(f'Could not save the task demo_task_{task_id} :(')

 def reset_task(self, task_id):
 """Reset the task by erasing the ratings and operator notes."""
 # load the original task
 with open(f'./data/demo_task_{task_id}.json') as task_file:
 task = json.load(task_file)
 # erase the ratings for answer 1
 task['ratings_1']['groundedness'] = ''
 task['ratings_1']['fluency'] = ''
 task['ratings_1']['utility'] = ''
 task['ratings_1']['notes'] = ''
 # erase the ratings for answer 2
 task['ratings_2']['groundedness'] = ''
 task['ratings_2']['fluency'] = ''
 task['ratings_2']['utility'] = ''
 task['ratings_2']['notes'] = ''
 # erase overall ratings
 task['overall'] = ''
 task['notes'] = ''
 try:
 # save the reset task to json file
 with open(f'./data/demo_task_{task_id}.json', 'w', encoding='utf-8') as task_file:
 json.dump(task, task_file, ensure_ascii=False, indent=4)
 # save the reset task to google spreadsheet
 self.reset_gsheet(task_id)
 gr.Info(f'Task demo_task_{task_id} is reset!')
 except:
 raise gr.Error(f'Could not reset the task demo_task_{task_id} :(')
 return '', '', '', '', '', '', '', '', '', ''

 def save_gsheet(self, task_id, ratings):
 """Save the task to google spreadsheet."""
 try:
 # configure gsheet credentials
 gc = gspread.service_account('./gsheet_service_account.json')
 sheet_id = '1D2sfE9YXKtd7cKlgalo5UnuNKC-GhxlGqHVYUlkQlCY'
 sh = gc.open_by_key(sheet_id).worksheet('demo-app')
 # update non-empty task ratings in the worksheet
 cell_val = []
 for idx, rating in enumerate([ratings[idx] for idx in [0,1,2,4,5,6,8,9]]):
 if rating:
 cell_val.append({'range': f'{chr(67+idx)}{3+int(task_id)}', 'values': [[f'{rating}']]})
 sh.batch_update(cell_val)
 except:
 gr.Warning(f'Could not save the task demo_task_{task_id} to the spreadsheet :(')

 def reset_gsheet(self, task_id):
 """Reset the task ratings in google spreadsheet."""
 try:
 # configure gsheet credentials
 gc = gspread.service_account('./gsheet_service_account.json')
 sheet_id = '1D2sfE9YXKtd7cKlgalo5UnuNKC-GhxlGqHVYUlkQlCY'
 sh = gc.open_by_key(sheet_id).worksheet('demo-app')
 # update task ratings in the worksheet if there are any (clearing empty values removes formatting)
 if sh.get(f'C{3+int(task_id)}:J{3+int(task_id)}')[0]:
 sh.batch_clear([f'C{3+int(task_id)}:J{3+int(task_id)}'])
 except:
 gr.Warning(f'Could not reset the task demo_task_{task_id} in the spreadsheet :(')

 def launch_interface(self):
 """Launch the A/B Evaluation RAG task interface."""
 gr.close_all()
 self.interface.queue(default_concurrency_limit=None)
 self.interface.launch()


rag = RAGInterface()

In [None]:
# create placeholder tasks
import os
import json

os.makedirs('./data/', exist_ok=True)
for idx in range(1):
 task = {
 'id': f'{idx}',
 'chat_history': [['user message 1', 'bot message 1'], ['user message 2', 'bot message 2']],
 'question': 'question',
 'search_query': 'search query',
 'search_results': ['source 1', 'source 2', 'source 3'],
 'answer_1': 'answer 1',
 'answer_2': 'answer 2',
 'ratings_1': {'groundedness': '', 'utility': '', 'fluency': '', 'notes': ''},
 'ratings_2': {'groundedness': '', 'utility': '', 'fluency': '', 'notes': ''},
 'overall': '',
 'notes': ''
 }
 with open(f'./data/demo_task_{idx}.json', 'w', encoding='utf-8') as task_file:
 json.dump(task, task_file, ensure_ascii=False, indent=4)


In [None]:
# make demo tasks from the csv of the spreadsheet
# https://docs.google.com/spreadsheets/d/1kYW0cABv2C-mMmmw2Uc50mQC0MmOuoqKJQaBp7IyCho/edit#gid=1934745276
import os
import json
import pandas as pd

df = pd.read_csv('./dev.csv')
df

os.makedirs('./data/', exist_ok=True)
for idx in range(len(df)):
 row = df.iloc[idx]
 task = {
 'id': f'{idx+1}',
 'chat_history': [],
 'question': f'{row["question"]}',
 'search_query': '',
 'search_results': [],
 'answer_1': f'{row["answer_1"]}',
 'answer_2': f'{row["answer_2"]}',
 'ratings_1': {'groundedness': '', 'utility': '', 'fluency': '', 'notes': ''},
 'ratings_2': {'groundedness': '', 'utility': '', 'fluency': '', 'notes': ''},
 'overall': '',
 'notes': ''
 }

 # chat history
 try:
 i = 1
 while not pd.isna(row[f'user message {i}']):
 task['chat_history'].append([row[f'user message {i}'], row[f'bot message {i}']])
 i += 1
 except:
 pass

 # search query
 if not pd.isna(row['search_2']):
 task['search_query'] = f'{row["search_1"]}\n{row["search_2"]}'
 else:
 task['search_query'] = f'{row["search_1"]}'

 # search results
 try:
 i = 1
 while not pd.isna(row[f'source {i}']):
 task['search_results'].append(row[f'source {i}'])
 i += 1
 except:
 pass

 # save the task
 with open(f'./data/demo_task_{idx+1}.json', 'w', encoding='utf-8') as task_file:
 json.dump(task, task_file, ensure_ascii=False, indent=4)
