import copy import gradio as gr import pandas as pd from datasets import load_dataset from src.genai import GenAI from src.semantic_searcher import SemanticSearcher from src.upvote_predictor import UpvotePredictor # Load the dataset dataset_counsel_chat = load_dataset("nbertagnolli/counsel-chat") df_counsel_chat = pd.DataFrame(dataset_counsel_chat["train"]) df_counsel_chat_topic = copy.deepcopy( df_counsel_chat[ ["questionID", "questionTitle", "questionText", "answerText", "topic"] ] ) df_counsel_chat_topic["questionCombined"] = df_counsel_chat_topic.apply( lambda x: ( f"QUESTION_TITLE: {x['questionTitle']}\nQUESTION_CONTEXT: {x['questionText']}" ), axis=1, ) df_counsel_chat_topic = df_counsel_chat_topic.drop_duplicates( subset="questionID" ).reset_index(drop=True) # list of unique topics unique_topics = sorted(df_counsel_chat_topic["topic"].unique().tolist()) unique_topics = "\n".join( [f"{idx+1}. {topic}" for idx, topic in enumerate(unique_topics)] ) # few examples few_examples = ( df_counsel_chat_topic.groupby("topic", as_index=False)[ ["questionID", "questionCombined", "answerText", "topic"] ] .apply(lambda s: s.sample(1)) .reset_index(drop=True) ) few_examples["examples"] = few_examples.apply( lambda x: ( f"{x['questionCombined']}\nTOPIC: {x['topic']}\nANSWER: {x['answerText']}" ), axis=1, ) examples = "\n".join( f"\n{example}\n\n\n" for idx, example in enumerate(few_examples["examples"].to_list()) ) # Initialize the SemanticSearcher genai = GenAI() upvote_predictor = UpvotePredictor("src/bert_model") ss = SemanticSearcher(df_counsel_chat_topic, df_counsel_chat) def get_output(question: str, question_context: str = None) -> str: answer, topic = genai.generate_content( question, question_context, unique_topics, examples ) upvote_prediction = upvote_predictor.get_upvote_prediction( question, answer, question_context ) if "not" in upvote_prediction.lower(): df = ss.retrieve_relevant_qna(question, question_context) return (answer, topic, upvote_prediction, df) else: return (answer, topic, upvote_prediction, pd.DataFrame()) demo = gr.Interface( fn=get_output, inputs=[ gr.Textbox(label="Input Question"), gr.Textbox(label="(Optional) Additional Context for Question"), ], outputs=[ gr.Textbox(label="GenAI based suggestion"), gr.Textbox(label="Suggested Topic of Question"), gr.Textbox(label="Is GenAI based suggestion credible?"), gr.Dataframe( label=( "Semantically similar questions (and other metadata) to input question." " Will be available if GenAI based suggestion is not credible." ) ), ], ) demo.launch(debug=True) # #input question # input_question_context = "I'm going through some things with my feelings and myself. I barely sleep and I do nothing but think about how I'm worthless and how I shouldn't be here. I've never tried or contemplated suicide. I've always wanted to fix my issues, but I never get around to it. How can I change my feeling of being worthless to everyone?" # input_question = "How can I change my feeling of being worthless to everyone?"