legacydemo / app.py
gupta-amulya's picture
Enhance SemanticSearcher integration and refine UpvotePredictor output handling
20df6e4
import copy
import gradio as gr
import pandas as pd
from datasets import load_dataset
from src.genai import GenAI
from src.semantic_searcher import SemanticSearcher
from src.upvote_predictor import UpvotePredictor
# Load the dataset
dataset_counsel_chat = load_dataset("nbertagnolli/counsel-chat")
df_counsel_chat = pd.DataFrame(dataset_counsel_chat["train"])
df_counsel_chat_topic = copy.deepcopy(
df_counsel_chat[
["questionID", "questionTitle", "questionText", "answerText", "topic"]
]
)
df_counsel_chat_topic["questionCombined"] = df_counsel_chat_topic.apply(
lambda x: (
f"QUESTION_TITLE: {x['questionTitle']}\nQUESTION_CONTEXT: {x['questionText']}"
),
axis=1,
)
df_counsel_chat_topic = df_counsel_chat_topic.drop_duplicates(
subset="questionID"
).reset_index(drop=True)
# list of unique topics
unique_topics = sorted(df_counsel_chat_topic["topic"].unique().tolist())
unique_topics = "\n".join(
[f"{idx+1}. {topic}" for idx, topic in enumerate(unique_topics)]
)
# few examples
few_examples = (
df_counsel_chat_topic.groupby("topic", as_index=False)[
["questionID", "questionCombined", "answerText", "topic"]
]
.apply(lambda s: s.sample(1))
.reset_index(drop=True)
)
few_examples["examples"] = few_examples.apply(
lambda x: (
f"{x['questionCombined']}\nTOPIC: {x['topic']}\nANSWER: {x['answerText']}"
),
axis=1,
)
examples = "\n".join(
f"<EXAMPLE {idx+1} start>\n{example}\n<EXAMPLE {idx+1} end>\n\n"
for idx, example in enumerate(few_examples["examples"].to_list())
)
# Initialize the SemanticSearcher
genai = GenAI()
upvote_predictor = UpvotePredictor("src/bert_model")
ss = SemanticSearcher(df_counsel_chat_topic, df_counsel_chat)
def get_output(question: str, question_context: str = None) -> str:
answer, topic = genai.generate_content(
question, question_context, unique_topics, examples
)
upvote_prediction = upvote_predictor.get_upvote_prediction(
question, answer, question_context
)
if "not" in upvote_prediction.lower():
df = ss.retrieve_relevant_qna(question, question_context)
return (answer, topic, upvote_prediction, df)
else:
return (answer, topic, upvote_prediction, pd.DataFrame())
demo = gr.Interface(
fn=get_output,
inputs=[
gr.Textbox(label="Input Question"),
gr.Textbox(label="(Optional) Additional Context for Question"),
],
outputs=[
gr.Textbox(label="GenAI based suggestion"),
gr.Textbox(label="Suggested Topic of Question"),
gr.Textbox(label="Is GenAI based suggestion credible?"),
gr.Dataframe(
label=(
"Semantically similar questions (and other metadata) to input question."
" Will be available if GenAI based suggestion is not credible."
)
),
],
)
demo.launch(debug=True)
# #input question
# input_question_context = "I'm going through some things with my feelings and myself. I barely sleep and I do nothing but think about how I'm worthless and how I shouldn't be here. I've never tried or contemplated suicide. I've always wanted to fix my issues, but I never get around to it. How can I change my feeling of being worthless to everyone?"
# input_question = "How can I change my feeling of being worthless to everyone?"