Spaces:

Phoenix21
/

chatbot

Sleeping

App Files Files Community

chatbot / app.py

Phoenix21

Update app.py

9afbd21 verified 6 months ago

raw

history blame

7.79 kB

	# Install necessary libraries in Colab
	# !pip install datasets langchain_community smolagents chardet gradio pandas nltk sklearn

	# Import required modules
	import os
	import getpass
	import pandas as pd
	import chardet
	import re
	from langchain.docstore.document import Document
	from langchain.text_splitter import RecursiveCharacterTextSplitter
	from langchain_community.retrievers import BM25Retriever
	from smolagents import CodeAgent, HfApiModel, DuckDuckGoSearchTool , Tool ,LiteLLMModel
	import gradio as gr
	import logging
	from nltk.corpus import words
	from sklearn.feature_extraction.text import TfidfVectorizer
	from sklearn.metrics.pairwise import cosine_similarity

	# Set up logging
	logging.basicConfig(level=logging.INFO)
	logger = logging.getLogger("Daily Wellness AI Guru")

	# Securely input the GROQ API key
	if 'GROQ_API_KEY' not in os.environ or not os.environ['GROQ_API_KEY']:
	os.environ['GROQ_API_KEY'] = getpass.getpass('Enter GROQ_API_KEY: ')
	else:
	print("GROQ_API_KEY is already set.")

	# Load NLTK word list for valid word checks
	try:
	english_words = set(words.words())
	except LookupError:
	import nltk
	nltk.download('words')
	english_words = set(words.words())

	# Define allowed topics for health and wellness
	ALLOWED_TOPICS = [
	"mental health",
	"physical health",
	"fitness",
	"nutrition",
	"exercise",
	"mindfulness",
	"sleep",
	"stress management",
	"wellness",
	"relaxation",
	"healthy lifestyle",
	"self-care",
	"meditation",
	"diet",
	"hydration",
	"breathing techniques",
	"yoga",
	"stress relief",
	"emotional health",
	"spiritual health",
	"healthy habits"
	]

	def is_valid_input(query):
	"""
	Validate the user's input question.
	"""
	if not query or query.strip() == "":
	return False, "Input cannot be empty. Please provide a meaningful question."

	if len(query.strip()) < 2:
	return False, "Input is too short. Please provide more context or details."

	# Check for valid words
	words_in_text = re.findall(r'\b\w+\b', query.lower())
	recognized_words = [word for word in words_in_text if word in english_words]

	if not recognized_words:
	return False, "Input appears unclear. Please use valid words in your question."

	return True, "Valid input."

	def similarity_search(query, corpus, threshold=0.2):
	"""
	Perform similarity search using TF-IDF and cosine similarity.
	"""
	vectorizer = TfidfVectorizer()
	tfidf_matrix = vectorizer.fit_transform(corpus + [query])
	query_vector = tfidf_matrix[-1]
	similarities = cosine_similarity(query_vector, tfidf_matrix[:-1]).flatten()
	max_similarity = max(similarities)
	if max_similarity >= threshold:
	most_similar_idx = similarities.argmax()
	return True, corpus[most_similar_idx], max_similarity
	return False, None, max_similarity

	# Load and process the AIChatbot.csv file
	def load_csv(file_path):
	"""
	Load and process a CSV file into a list of documents.
	"""
	try:
	with open(file_path, 'rb') as f:
	result = chardet.detect(f.read())
	encoding = result['encoding']
	data = pd.read_csv(file_path, encoding=encoding)
	questions = data['Question'].dropna().tolist()
	documents = [
	Document(page_content=row.to_string(index=False), metadata={"source": file_path})
	for _, row in data.iterrows()
	]
	logger.info(f"Loaded {len(documents)} documents from {file_path}")
	return documents, questions
	except Exception as e:
	logger.error(f"Error loading CSV file: {e}")
	return [], []

	# Load the AIChatbot.csv file
	file_path = "AIChatbot.csv" # Ensure this file is uploaded to your environment
	source_docs, corpus_questions = load_csv(file_path)
	if not source_docs:
	raise ValueError(f"Failed to load documents from {file_path}. Please check the file.")

	# Split documents into manageable chunks
	text_splitter = RecursiveCharacterTextSplitter(
	chunk_size=500,
	chunk_overlap=50,
	add_start_index=True,
	strip_whitespace=True,
	separators=["\n\n", "\n", ".", " ", ""],
	)
	docs_processed = text_splitter.split_documents(source_docs)
	logger.info(f"Split documents into {len(docs_processed)} chunks.")

	# Define the retriever tool
	class RetrieverTool(Tool):
	name = "retriever"
	description = "Uses semantic search to retrieve the parts of chatbot documentation most relevant to the query."
	inputs = {
	"query": {
	"type": "string",
	"description": "The query to perform. Use an affirmative tone rather than a question."
	}
	}
	output_type = "string"

	def __init__(self, docs, **kwargs):
	super().__init__(**kwargs)
	self.retriever = BM25Retriever.from_documents(docs, k=10)

	def forward(self, query: str) -> str:
	assert isinstance(query, str), "Search query must be a string."
	docs = self.retriever.invoke(query)
	if docs:
	return docs[0].page_content.strip()
	else:
	return "No relevant information found."

	retriever_tool = RetrieverTool(docs_processed)

	# Define DuckDuckGoSearchTool
	duckduckgo_search_tool = DuckDuckGoSearchTool()

	# Define the improved custom prompt
	custom_prompt = """
	You are Daily Wellness AI Guru, a friendly and knowledgeable assistant here to simplify wellness. Your goal is to provide clear, concise, and actionable answers to the user's health and wellness-related questions. Mention how Daily Wellness AI offers tailored solutions for day-to-day wellness tasks. Use a warm and friendly tone to make the user feel at ease.

	When answering:
	1. Address the user warmly with "Hello! This is Daily Wellness AI Guru."
	2. Highlight the key points in an easy-to-understand manner.
	3. Include practical examples, tips, or short guides where relevant.
	4. Format the response for clarity using markdown (e.g., numbered lists, bullet points).
	5. Reinforce how Daily Wellness AI helps simplify wellness through AI-powered solutions.
	6. End with an engaging and polite closing remark that invites further questions.
	"""

	# Define the agent using smolagents
	model = LiteLLMModel("groq/llama3-8b-8192") # Ensure the model is available
	agent = CodeAgent(
	tools=[retriever_tool, duckduckgo_search_tool], model=model, max_iterations=4, verbose=True
	)

	# Gradio interface for interacting with the RAG pipeline
	def gradio_interface(query):
	try:
	# Validate input
	is_valid, message = is_valid_input(query)
	if not is_valid:
	return message

	# Perform similarity search
	similar, similar_question, similarity_score = similarity_search(query, corpus_questions, threshold=0.2)
	if similar:
	response = agent.run(f"{custom_prompt}\n\nQuestion: {query}")
	return response.strip()
	else:
	response = duckduckgo_search_tool.invoke(query)
	return f"{response.strip()}\n\nRemember, Daily Wellness AI is here to simplify wellness with AI-powered solutions. Feel free to ask more questions!"
	except Exception as e:
	logger.error(f"Error during query processing: {e}")
	return "An error occurred while processing your request. Please try again later."


	# Create the Gradio interface
	interface = gr.Interface(
	fn=gradio_interface,
	inputs=gr.Textbox(label="Enter your question", placeholder="e.g., How does box breathing help reduce anxiety?"),
	outputs=gr.Markdown(label="Answer"),
	title="Daily Wellness AI Guru Chatbot",
	description="Ask health and wellness questions. Get actionable, friendly advice from your wellness companion.",
	theme="compact"
	)

	if __name__ == "__main__":
	interface.launch(debug=True)