Spaces:

chukbert
/

medical-faq-chatbot

Sleeping

App Files Files Community

medical-faq-chatbot / app.py

chukbert

Update app.py

1f1886f verified 9 months ago

raw

history blame

3.7 kB

	import pandas as pd
	import openai
	import faiss
	import numpy as np
	import time
	import os
	import pickle
	import gradio as gr
	from langchain.embeddings.openai import OpenAIEmbeddings
	from io import StringIO
	from huggingface_hub import hf_hub_download
	from huggingface_hub import login

	openai.api_key = os.getenv("OPENAI_API_KEY")
	hf_token = os.getenv("HF_TOKEN")

	login(token=hf_token)

	def load_embeddings_and_faiss():
	embeddings_path = hf_hub_download(repo_id="chukbert/embedding-faq-medquad", filename="embeddings.pkl",repo_type="dataset", token=hf_token)
	faiss_index_path = hf_hub_download(repo_id="chukbert/embedding-faq-medquad", filename="faiss.index",repo_type="dataset", token=hf_token)

	faiss_index = faiss.read_index(faiss_index_path)

	with open(embeddings_path, 'rb') as f:
	question_embeddings = pickle.load(f)

	return faiss_index, question_embeddings

	def retrieve_answer(question, faiss_index, embedding_model, answers, log_output, threshold=0.2):
	question_embedding = embedding_model.embed_query(question)
	distances, indices = faiss_index.search(np.array([question_embedding]), k=1)

	closest_distance = distances[0][0]
	closest_index = indices[0][0]
	log_output.write(f"closest_distance: {closest_distance}")

	if closest_distance > threshold:
	return "No good match found in dataset. Using GPT-4o-mini to generate an answer."

	return answers[closest_index]

	def ask_openai_gpt4(question):
	response = openai.chat.completions.create(
	messages=[
	{"role": "user", "content": f"Answer the following medical question: {question}"}
	],
	model="gpt-4o-mini",
	max_tokens=150
	)
	return response.choices[0].message.content

	def chatbot(user_input):
	log_output = StringIO() # To capture logs

	faiss_index, question_embeddings = load_embeddings_and_faiss()
	embedding_model = OpenAIEmbeddings(openai_api_key=openai.api_key)

	start_time = time.time() # Start timer

	log_output.write("Retrieving answer from FAISS...\n")
	response_text = retrieve_answer(user_input, faiss_index, embedding_model, answers, log_output, threshold=0.3)

	if response_text == "No good match found in dataset. Using GPT-4o-mini to generate an answer.":
	log_output.write("No good match found in dataset. Using GPT-4o-mini to generate an answer.\n")
	response_text = ask_openai_gpt4(user_input)

	end_time = time.time() # End timer
	response_time = end_time - start_time # Calculate response time

	# Log the final response time

	# Return the chatbot response, response time, and log
	return response_text, f"Response time: {response_time:.4f} seconds", log_output.getvalue()

	# Simplified Gradio interface with response, response time, and logs
	demo = gr.Interface(
	fn=chatbot, # Main chatbot function
	inputs="text", # User input: single text field
	outputs=[
	gr.Textbox(label="Chatbot Response"), # Named output for the chatbot response
	gr.Textbox(label="Response Time"), # Named output for the response time
	gr.Textbox(label="Logs") # Logs
	],
	title="Medical Chatbot with Custom Knowledge About Medical FAQ",
	description="A chatbot with custom knowledge using FAISS for quick responses or fallback to GPT-4o-mini when no relevant answer is found. Response time is also tracked."
	)

	if __name__ == "__main__":
	# Load dataset
	df = pd.read_csv("medquad.csv")
	questions = df['question'].tolist()
	answers = df['answer'].tolist()

	print(f"Loaded questions and answers. Number of questions: {len(questions)}, Number of answers: {len(answers)}")
	demo.launch()