|
import streamlit as st |
|
import os |
|
import random |
|
import importlib.util |
|
import firebase_admin |
|
from firebase_admin import credentials, firestore |
|
|
|
|
|
|
|
if not firebase_admin._apps: |
|
cred = credentials.Certificate("coco-evaluation-firebase-adminsdk-p3m64-99c4ea22c1.json") |
|
firebase_admin.initialize_app(cred) |
|
db = firestore.client() |
|
|
|
|
|
|
|
st.set_page_config( |
|
page_title="TB Chatbot Evaluation", |
|
page_icon="👋", |
|
) |
|
|
|
|
|
|
|
|
|
|
|
|
|
PASSCODE = os.environ["MY_PASSCODE"] |
|
|
|
|
|
if "authenticated" not in st.session_state: |
|
st.session_state["authenticated"] = False |
|
|
|
|
|
|
|
def init_session_state(): |
|
if "authenticated" not in st.session_state: |
|
st.session_state["authenticated"] = False |
|
if "evaluator_confirmed" not in st.session_state: |
|
st.session_state["evaluator_confirmed"] = None |
|
if "model_order" not in st.session_state: |
|
st.session_state["model_order"] = [] |
|
if "current_index" not in st.session_state: |
|
st.session_state["current_index"] = 0 |
|
if "models_completed" not in st.session_state: |
|
st.session_state["models_completed"] = False |
|
if "evaluation_status" not in st.session_state or not st.session_state["evaluation_status"]: |
|
st.session_state["evaluation_status"] = {} |
|
if "all_evaluations" not in st.session_state: |
|
st.session_state["all_evaluations"] = {} |
|
if "evaluation_ids" not in st.session_state: |
|
st.session_state["evaluation_ids"] = {} |
|
if "start_time" not in st.session_state: |
|
st.session_state["start_time"] = None |
|
if "evaluation_durations" not in st.session_state: |
|
st.session_state["evaluation_durations"] = {} |
|
if "submitted_evaluations" not in st.session_state: |
|
st.session_state["submitted_evaluations"] = set() |
|
|
|
|
|
init_session_state() |
|
|
|
|
|
|
|
if not st.session_state["authenticated"]: |
|
st.markdown(f"<h1 style='text-align: center;'>Welcome to the TB Chatbot Evaluation</h1>", unsafe_allow_html=True) |
|
|
|
col1, col2, col3 = st.columns([2, 1, 1]) |
|
with col1: |
|
st.write("Are you an evaluator?") |
|
with col2: |
|
if st.button("Yes"): |
|
st.session_state["evaluator_confirmed"] = True |
|
with col3: |
|
if st.button("No"): |
|
st.session_state["evaluator_confirmed"] = False |
|
|
|
if st.session_state["evaluator_confirmed"]: |
|
evaluator_id = st.text_input("Enter your Evaluator ID (can be anything)") |
|
passcode = st.text_input("Enter Passcode to Access Models (password is the same)", type="password") |
|
if st.button("Submit"): |
|
print("Hello") |
|
if passcode == PASSCODE and evaluator_id: |
|
print("Submitted") |
|
|
|
db.collection("evaluator_ids").document(evaluator_id).set({ |
|
"evaluator_id": evaluator_id, |
|
"timestamp": firestore.SERVER_TIMESTAMP |
|
}) |
|
|
|
|
|
st.session_state["authenticated"] = True |
|
st.session_state["evaluator_id"] = evaluator_id |
|
|
|
|
|
|
|
if st.session_state["authenticated"]: |
|
|
|
with st.sidebar: |
|
|
|
if "page_mapping" not in st.session_state: |
|
|
|
PAGES_DIR = "pages" |
|
page_files = [f for f in os.listdir(PAGES_DIR) if f.endswith(".py")] |
|
random.shuffle(page_files) |
|
|
|
|
|
generic_names = [f"Model {chr(65 + i)}" for i in range(len(page_files))] |
|
|
|
|
|
st.session_state["page_mapping"] = { |
|
generic_name: os.path.join(PAGES_DIR, page_file) for generic_name, page_file in zip(generic_names, page_files) |
|
} |
|
|
|
|
|
pages = st.session_state["page_mapping"] |
|
|
|
|
|
selected_generic_name = st.selectbox("Navigation", list(pages.keys()), label_visibility="collapsed") |
|
|
|
|
|
selected_page_path = pages[selected_generic_name] |
|
spec = importlib.util.spec_from_file_location(selected_generic_name, selected_page_path) |
|
module = importlib.util.module_from_spec(spec) |
|
spec.loader.exec_module(module) |
|
|
|
|
|
st.markdown(""" |
|
# Welcome to the TB Chatbot Simulation Portal |
|
|
|
This portal allows you to interact with our TB-powered chatbot, built using OpenAI's GPT-4. Here, you can evaluate multiple chatbot configurations, each designed to foster trust, empathy, and medical accuracy in responses. |
|
|
|
## Purpose |
|
|
|
Your task is to assess the chatbot models by interacting with them using patient scenario-based questions. Evaluate each model based on the following principles: |
|
|
|
### Trust |
|
- Does the response convey confidence and reliability? |
|
- Are the answers factually accurate and rooted in medical expertise? |
|
|
|
### Medical Accuracy |
|
- Are responses aligned with current medical guidelines and best practices? |
|
- Do they avoid misinformation or potentially harmful suggestions? |
|
|
|
### Empathy |
|
- Does the chatbot demonstrate understanding and compassion? |
|
- Are responses emotionally sensitive and supportive? |
|
|
|
Your feedback will help us identify the best model to support our mission of enhancing patient communication and care through AI-driven solutions. |
|
""") |
|
|