scify-demo / app.py
ferraro's picture
minor changes, and debate initialization with CoRE
1f97642
import streamlit as st
import random
import time
import hmac
import os
import json
import requests
from llm_reasoner import LLMReasoner
from prompts import templates, get_examples
from typing import Any
from string import Template
from qa_dreamer import get_questions
from multi_agent_debate import MultiAgentDebate
def safe_parse_json(model_answer):
""".."""
try:
parsed_answer = json.loads(model_answer)
return parsed_answer
except json.JSONDecodeError as e:
print("Failed to parse JSON: %s", e)
print(f"Raw model answer is: {model_answer}")
model_answer = model_answer.replace("```json", "")
model_answer = model_answer.replace("```", "")
try:
parsed_answer = json.loads(model_answer)
print(f"Hit parsing error but cleaned json as markdown code snippet and it can be parsed now")
return parsed_answer
except json.JSONDecodeError as e:
print("Failed to parse JSON: %s", e)
print(f"Tried to parse cleaned model answer which was: {model_answer}")
return None
def check_password():
"""Returns `True` if the user had a correct password."""
def login_form():
"""Form with widgets to collect user information"""
with st.form("Credentials"):
st.text_input("Username", key="username")
st.text_input("Password", type="password", key="password")
st.form_submit_button("Log in", on_click=password_entered)
def password_entered():
"""Checks whether a password entered by the user is correct."""
stored_password = os.getenv(st.session_state["username"])
if stored_password == st.session_state["password"]:
st.session_state["password_correct"] = True
del st.session_state["password"] # Remove credentials from session
del st.session_state["username"]
return
# If authentication fails
st.session_state["password_correct"] = False
# Return True if the username + password is validated.
if st.session_state.get("password_correct", False):
return True
# Show inputs for username + password.
login_form()
if "password_correct" in st.session_state:
st.error("πŸ˜• User not known or password incorrect")
return False
def select_models():
"""Returns only when a valid option is selected from both dropdowns."""
retriever_options = ["Choose one...", "BM25 Retriever", "Off-the-shelf Retriever", "Finetuned Retriever", "No Retriever"]
reasoner_options = ["Choose one...", "Claude Sonnet", "GPT-4o", "o3-mini", "Multi Agent Debate"]
dreamer_options = ["None", "CoRE", "CoRE-Contrastive", "QA-Decomposition"]
#selectboxes
retriever = st.selectbox(
"Select the Retriever Model",
retriever_options,
key="retriever"
)
dreamer = st.selectbox(
"Select the DREAMER",
dreamer_options,
key="dreamer"
)
reasoner = st.selectbox(
"Select the Reasoner Model",
reasoner_options,
key="reasoner"
)
# next button
if st.button("Next"):
# Check that both selections are not the placeholder.
if retriever == "Choose one..." or reasoner == "Choose one...":
st.info("Please select both a retriever and a reasoner.")
return None, None, None
else:
# Store the valid selections in session state
st.session_state["selected_models"] = (retriever, dreamer, reasoner)
return retriever, dreamer, reasoner
else:
st.info("Click 'Next' once you have made your selections.")
return None, None, None
def retriever(query: str, selected_retriever: str):
"""Simulate a 'retriever' step, searching for relevant information."""
with st.chat_message("assistant"):
placeholder = st.empty()
text = ""
if selected_retriever == "BM25 Retriever":
message = "Using the BM25 retriever to search for documents related to your query..."
retriever_endpoint = "bm25"
elif selected_retriever == "Off-the-shelf Retriever":
message = "Using the off-the-shelf retriever to fetch detailed documents relevant to your query..."
retriever_endpoint = "ots"
elif selected_retriever == "Finetuned Retriever":
message = "Using the finetuned retriever to fetch detailed documents relevant to your query..."
retriever_endpoint = "ft"
else:
message = "No retriever selected. Skipping document retrieval."
retriever_endpoint = "None"
for chunk in message.split():
text += chunk + " "
time.sleep(0.05)
# Add a blinking cursor to simulate typing
placeholder.markdown(text + "β–Œ")
placeholder.markdown(text)
if retriever_endpoint == "None":
return ""
headers = {
'Content-Type': 'application/json',
}
json_data = {
'claim': query,
}
url = "http://18.227.0.215"
port = "8000"
response = requests.post(f'{url}:{port}/{retriever_endpoint}', headers=headers, json=json_data)
documents = response.json()["Documents"]
k = 3
topk_documents = documents[:k]
corpus = '\n\n'.join(topk_documents)
print(f"Topk documents: {topk_documents}")
for idx, document in enumerate(topk_documents):
try:
a_idx = document.lower().index("abstract:")
title = document[:a_idx].replace('\n', ' ')
doc_details = document[a_idx:]
except Exception as e:
print(f"Document parsing error for title and other contents-\n{document}, \n\nError: {e}")
title = "Document title"
doc_details = document
with st.expander(f'Rank {idx+1} - {title.replace("Title:", "")}'):
st.write(doc_details)
# retrieved_document_titles = ""
# for document in topk_documents:
# try:
# idx = document.lower().index("abstract:")
# title = document[:idx].replace('\n', ' ')
# retrieved_document_titles = retrieved_document_titles + title + ' ||| '
# except:
# retrieved_document_titles = retrieved_document_titles + document + ' ||| '
# display_retrieved_documents = f"Retrieved Documents - ||| {retrieved_document_titles}"
# with st.chat_message("assistant"):
# placeholder = st.empty()
# text = ""
# print(display_retrieved_documents)
# for chunk in display_retrieved_documents.split():
# if chunk == "|||":
# text += " \n" + " \n"
# else:
# text += chunk + " "
# time.sleep(0.05)
# # Add a blinking cursor to simulate typing
# placeholder.markdown(text + "β–Œ")
# placeholder.markdown(text)
# You could return retrieved info here.
return corpus
def display_to_chat(placeholder, message):
"""Simulate a stream of messages in the chat."""
text = ""
import re
for chunk in re.split(r'(\s+)', message):
text += chunk + " "
time.sleep(0.05)
# Add a blinking cursor to simulate typing
placeholder.markdown(text + "β–Œ")
placeholder.markdown(text)
def reasoner(query: str, documents: list[str], selected_dreamer: str, selected_reasoner: str, llm_client: Any):
"""Simulate a 'reasoner' step, thinking about how to answer."""
with st.chat_message("assistant"):
placeholder = st.empty()
text = ""
message = ""
# find the appropriate template
if selected_dreamer == 'None':
if not documents or len(documents) == 0:
prompt_template = Template(templates["no_evidence"])
prompt = prompt_template.substitute(claim=query)
print(prompt)
else:
prompt_template = Template(templates["with_evidence"])
prompt = prompt_template.substitute(claim=query, corpus_text=documents)
elif (selected_dreamer == 'CoRE' or selected_dreamer == 'CoRE-Contrastive') and selected_reasoner != "Multi Agent Debate":
conditions = [""]
prompt_template = Template(templates["generate_condition"])
prompt = prompt_template.substitute(claim=query)
prompt = get_examples() + prompt + " answer: "
print(prompt)
message += f"Using {selected_dreamer} to decompose and elaborate the claim..."
display_to_chat(placeholder, message)
placeholder = st.empty()
llm_response = llm_client.run_inference(prompt)
print(llm_response)
conditions = llm_response.split('\n\n')
print(conditions)
condition = conditions[0] if selected_dreamer == 'CoRE' else conditions[1]
message = "To reason about the claim, CoRE is considering the potential condition: " + condition + "\n\n\n\n"
if not documents or len(documents) == 0:
prompt_template = Template(templates["with_condition"])
prompt = prompt_template.substitute(claim=query, condition=condition)
else:
prompt_template = Template(templates["with_evidence_condition"])
prompt = prompt_template.substitute(claim=query, corpus_text=documents, condition=condition)
elif selected_dreamer == 'QA-Decomposition':
message += "Decomposing into atomic questions...\n"
display_to_chat(placeholder, message)
placeholder = st.empty()
questions = get_questions(query)
message = questions + "\n\n\n\n"
message += "Now using the question decomposition to reason and verify the claim...\n\n"
if not documents or len(documents) == 0:
prompt_template = Template(templates["no_evidence_questions"])
prompt = prompt_template.substitute(claim=query, questions=questions)
print(prompt)
else:
prompt_template = Template(templates["with_evidence_questions"])
prompt = prompt_template.substitute(claim=query, corpus_text=documents, questions=questions)
else:
if not documents or len(documents) == 0:
prompt_template = Template(templates["no_evidence"])
prompt = prompt_template.substitute(claim=query)
else:
prompt_template = Template(templates["with_evidence"])
prompt = prompt_template.substitute(claim=query, corpus_text=documents)
if selected_reasoner == "Multi Agent Debate":
initial_agent_beliefs=None
if selected_dreamer == 'CoRE' or selected_dreamer == 'CoRE-Contrastive':
conditions = [""]
prompt_template = Template(templates["generate_condition"])
prompt = prompt_template.substitute(claim=query)
prompt = get_examples() + prompt + " answer: "
llm_response = llm_client.run_inference(prompt)
initial_agent_beliefs = llm_response.split('\n\n')
print(initial_agent_beliefs)
multi_agent_debate = MultiAgentDebate(client=llm_client.client)
initial_evidence = "" if (not documents or len(documents)==0) else documents[0]
def mad_printer(msg, explanation):
print(msg)
ph = st.empty()
display_to_chat(ph, msg)
if explanation is not None or len(explanation) > 0:
with st.expander("Agent's Argument (see more)"):
st.write(str(explanation))
decision = multi_agent_debate(claim=query,
doc=initial_evidence,
initial_agent_responses=initial_agent_beliefs,
writer=mad_printer)
reasoning = "of the debate and discussion."
else:
if selected_reasoner == "Claude Sonnet":
message += "Using Claude Sonnet to reason and verify the claim..."
elif selected_reasoner == "GPT-4o":
message += "Using GPT-4o to analyze and verify the claim in detail..."
elif selected_reasoner == "o3-mini":
message += "Using o3-mini to quickly analyze the claim..."
elif selected_reasoner == "Multi Agent Debate":
message += "Multiple Agents will discuss and reason about the claim..."
print(prompt)
llm_response = llm_client.run_inference(prompt)
answer_dict = safe_parse_json(llm_response)
try:
decision = answer_dict.get("decision", "")
reasoning = answer_dict.get("reasoning", "")
except:
print(f"Error with parsing the returned {answer_dict}")
decision, reasoning = "", ""
display_to_chat(placeholder, message)
# You could return reasoning info here.
return reasoning, decision
def main():
st.header(" Scientific Claim Verification ")
st.caption("Team UMBC-SBU-UT")
if not check_password():
st.stop()
if "selected_models" not in st.session_state:
selected_retriever, selected_dreamer, selected_reasoner = select_models()
# If valid selections are returned, store them and reset the change flag.
if selected_retriever is not None and selected_reasoner is not None:
st.session_state.selected_models = (selected_retriever, selected_dreamer, selected_reasoner)
st.rerun()
else:
st.stop() # Halt further execution until valid selections are made.
else:
selected_retriever, selected_dreamer, selected_reasoner = st.session_state.selected_models
# START OF AGENTIC DEMO
column1, column2 = st.columns(2)
column1.caption(f"Retriever Selected: {selected_retriever}")
column1.caption(f"Dreamer Selected: {selected_dreamer}")
column2.caption(f"Reasoner Selected: {selected_reasoner}")
if st.button("Change Selection", key="change_selection_btn"):
st.session_state.pop("selected_models", None)
st.session_state.pop("retriever", None)
st.session_state.pop("dreamer", None)
st.session_state.pop("reasoner", None)
st.session_state.messages = [{"role": "assistant", "content": "Let's start verifying the claims here! πŸ‘‡"}]
st.rerun()
# Initialize chat history
if "messages" not in st.session_state:
st.session_state.messages = [{"role": "assistant", "content": "Let's start verifying the claims here! πŸ‘‡"}]
# Display chat messages from history on app rerun
for message in st.session_state.messages:
with st.chat_message(message["role"]):
st.markdown(message["content"])
# Model configurations on agentic demo page
options = {}
options["max_tokens"] = 500
options["temperature"] = 0.0
if selected_reasoner == "Claude Sonnet":
api_key = os.getenv("claude_key")
options["model_family"] = "Anthropic"
options["model_name"] = "claude-3-5-sonnet-20240620"
elif selected_reasoner == "GPT-4o":
api_key = os.getenv("openai_key")
options["model_family"] = "OpenAI"
options["model_name"] = "gpt-4o-2024-11-20"
elif selected_reasoner == "o3-mini":
api_key = os.getenv("openai_key")
options["model_family"] = "OpenAI"
options["model_name"] = "o3-mini-2025-01-31"
elif selected_reasoner == "Multi Agent Debate":
api_key = os.getenv("openai_key")
options["model_family"] = "OpenAI"
options["model_name"] = "gpt-4o-2024-11-20"
options["API_KEY"] = api_key
llm_client = LLMReasoner(options)
def send_preset_message(text):
# Add user message to chat history
st.session_state.messages.append({"role": "user", "content": text})
# Set the auto_submit flag to process this message
st.session_state.auto_submit = True
st.session_state.auto_submit_text = text
# Quick input buttons - place these above the chat input
st.sidebar.subheader("Quick Inputs")
# Create buttons in the sidebar
if st.sidebar.button("Example 1 C"):
send_preset_message("The density of cytokine receptor bearing cells has no effect on the distance over which cytokines act.")
if st.sidebar.button("Example 2 C"):
send_preset_message("A total of 1,000 people in the UK are asymptomatic carriers of vCJD infection.")
if st.sidebar.button("Example 3 C"):
send_preset_message("Sepsis related mortality has risen from 2009 to 2014.")
if st.sidebar.button("Example 4 S"):
send_preset_message("IL-6 signaling plays a major role in atherosclerotic cardiovascular disease.")
if st.sidebar.button("Example 5 S"):
send_preset_message("The severity of cardiac involvement in amyloidosis can be described by the degree of transmurality of late gadolinium enhancement in MRI.")
if st.sidebar.button("Example 6 S"):
send_preset_message("There was an estimated 30 million cases of pneumonia in young children worldwide in 2010.")
if st.sidebar.button("New Assessment"):
st.session_state.messages = [{"role": "assistant", "content": "Let's start verifying the claims here! πŸ‘‡"}]
st.rerun()
# Handle auto-submission if a button was clicked
if "auto_submit" in st.session_state and st.session_state.auto_submit:
prompt = st.session_state.auto_submit_text
display_message = prompt + " \n" + " \n" + f"Retriever: {selected_retriever}, Dreamer: {selected_dreamer}, Reasoner: {selected_reasoner}"
st.session_state.messages.append({"role": "user", "content": prompt})
st.session_state.messages.append({"role": "summary", "content": display_message})
# Display user message in chat message container
with st.chat_message("user"):
st.markdown(display_message)
retrieved_documents = retriever(prompt, selected_retriever)
reasoning, decision = reasoner(prompt, retrieved_documents, selected_dreamer, selected_reasoner, llm_client)
# Display assistant response in chat message container
with st.chat_message("assistant"):
message_placeholder = st.empty()
full_response = ""
if decision.lower() == 'support':
assistant_response = f'The claim is CORRECT because {reasoning}'
elif decision.lower() == 'contradict':
assistant_response = f'The claim is INCORRECT because {reasoning}'
else:
assistant_response = 'Sorry, the query failed due to an issue with connecting to the LLM service.'
# Simulate stream of response with milliseconds delay
for chunk in assistant_response.split():
full_response += chunk + " "
time.sleep(0.05)
# Add a blinking cursor to simulate typing
message_placeholder.markdown(full_response + "β–Œ")
message_placeholder.markdown(full_response)
# Add assistant response to chat history
st.session_state.messages.append({"role": "assistant", "content": full_response})
# Reset the auto_submit flag
st.session_state.auto_submit = False
# Rerun to update the UI immediately - may not be needed
# st.rerun()
# Accept user input
prompt = st.chat_input("Type your claim here")
if prompt:
# Add user message to chat history
display_message = prompt + " \n"+ " \n"+ f"Retriever: {selected_retriever}, Dreamer: {selected_dreamer}, Reasoner: {selected_reasoner}"
st.session_state.messages.append({"role": "user", "content": prompt})
st.session_state.messages.append({"role": "summary", "content": display_message})
# Display user message in chat message container
with st.chat_message("user"):
st.markdown(display_message)
retrieved_documents = retriever(prompt, selected_retriever)
reasoning, decision = reasoner(prompt, retrieved_documents, selected_dreamer, selected_reasoner, llm_client)
# Display assistant response in chat message container
with st.chat_message("assistant"):
message_placeholder = st.empty()
full_response = ""
if decision.lower() == 'support':
assistant_response = f'The claim is CORRECT because {reasoning}'
elif decision.lower() == 'contradict':
assistant_response = f'The claim is INCORRECT because {reasoning}'
else:
assistant_response = f'Sorry, the query failed due to an issue with connecting to the LLM service.'
# Simulate stream of response with milliseconds delay
for chunk in assistant_response.split():
full_response += chunk + " "
time.sleep(0.05)
# Add a blinking cursor to simulate typing
message_placeholder.markdown(full_response + "β–Œ")
message_placeholder.markdown(full_response)
# Add assistant response to chat history
st.session_state.messages.append({"role": "assistant", "content": full_response})
if __name__ == '__main__':
main()