Spaces:
Sleeping
Sleeping
import copy | |
import gradio as gr | |
import pandas as pd | |
from datasets import load_dataset | |
from src.genai import GenAI | |
from src.semantic_searcher import SemanticSearcher | |
from src.upvote_predictor import UpvotePredictor | |
# Load the dataset | |
dataset_counsel_chat = load_dataset("nbertagnolli/counsel-chat") | |
df_counsel_chat = pd.DataFrame(dataset_counsel_chat["train"]) | |
df_counsel_chat_topic = copy.deepcopy( | |
df_counsel_chat[ | |
["questionID", "questionTitle", "questionText", "answerText", "topic"] | |
] | |
) | |
df_counsel_chat_topic["questionCombined"] = df_counsel_chat_topic.apply( | |
lambda x: ( | |
f"QUESTION_TITLE: {x['questionTitle']}\nQUESTION_CONTEXT: {x['questionText']}" | |
), | |
axis=1, | |
) | |
df_counsel_chat_topic = df_counsel_chat_topic.drop_duplicates( | |
subset="questionID" | |
).reset_index(drop=True) | |
# list of unique topics | |
unique_topics = sorted(df_counsel_chat_topic["topic"].unique().tolist()) | |
unique_topics = "\n".join( | |
[f"{idx+1}. {topic}" for idx, topic in enumerate(unique_topics)] | |
) | |
# few examples | |
few_examples = ( | |
df_counsel_chat_topic.groupby("topic", as_index=False)[ | |
["questionID", "questionCombined", "answerText", "topic"] | |
] | |
.apply(lambda s: s.sample(1)) | |
.reset_index(drop=True) | |
) | |
few_examples["examples"] = few_examples.apply( | |
lambda x: ( | |
f"{x['questionCombined']}\nTOPIC: {x['topic']}\nANSWER: {x['answerText']}" | |
), | |
axis=1, | |
) | |
examples = "\n".join( | |
f"<EXAMPLE {idx+1} start>\n{example}\n<EXAMPLE {idx+1} end>\n\n" | |
for idx, example in enumerate(few_examples["examples"].to_list()) | |
) | |
# Initialize the SemanticSearcher | |
genai = GenAI() | |
upvote_predictor = UpvotePredictor("src/bert_model") | |
ss = SemanticSearcher(df_counsel_chat_topic, df_counsel_chat) | |
def get_output(question: str, question_context: str = None) -> str: | |
answer, topic = genai.generate_content( | |
question, question_context, unique_topics, examples | |
) | |
upvote_prediction = upvote_predictor.get_upvote_prediction( | |
question, answer, question_context | |
) | |
if "not" in upvote_prediction.lower(): | |
df = ss.retrieve_relevant_qna(question, question_context) | |
return (answer, topic, upvote_prediction, df) | |
else: | |
return (answer, topic, upvote_prediction, pd.DataFrame()) | |
demo = gr.Interface( | |
fn=get_output, | |
inputs=[ | |
gr.Textbox(label="Input Question"), | |
gr.Textbox(label="(Optional) Additional Context for Question"), | |
], | |
outputs=[ | |
gr.Textbox(label="GenAI based suggestion"), | |
gr.Textbox(label="Suggested Topic of Question"), | |
gr.Textbox(label="Is GenAI based suggestion credible?"), | |
gr.Dataframe( | |
label=( | |
"Semantically similar questions (and other metadata) to input question." | |
" Will be available if GenAI based suggestion is not credible." | |
) | |
), | |
], | |
) | |
demo.launch(debug=True) | |
# #input question | |
# input_question_context = "I'm going through some things with my feelings and myself. I barely sleep and I do nothing but think about how I'm worthless and how I shouldn't be here. I've never tried or contemplated suicide. I've always wanted to fix my issues, but I never get around to it. How can I change my feeling of being worthless to everyone?" | |
# input_question = "How can I change my feeling of being worthless to everyone?" | |