Spaces:

Phoenix21
/

chatbot

Sleeping

App Files Files Community

Phoenix21 commited on Jan 2

Commit

5ec7b71

verified ·

1 Parent(s): 6458905

Create app.py

Browse files

Files changed (1) hide show

app.py +216 -0

app.py ADDED Viewed

	@@ -0,0 +1,216 @@

+# Install necessary libraries in Colab
+# !pip install datasets langchain_community smolagents chardet gradio pandas nltk sklearn
+# Import required modules
+import os
+import getpass
+import pandas as pd
+import chardet
+import re
+from langchain.docstore.document import Document
+from langchain.text_splitter import RecursiveCharacterTextSplitter
+from langchain_community.retrievers import BM25Retriever
+# from smolagents import Tool, HfApiModel, CodeAgent
+from smolagents import CodeAgent, HfApiModel, DuckDuckGoSearchTool, ManagedAgent
+from smolagents.agents import ToolCallingAgent
+from smolagents import Tool, HfApiModel, TransformersModel, LiteLLMModel
+from typing import Optional
+import gradio as gr
+import logging
+from nltk.corpus import words
+from sklearn.feature_extraction.text import TfidfVectorizer
+from sklearn.metrics.pairwise import cosine_similarity
+if 'GROQ_API_KEY' not in os.environ or not os.environ['GROQ_API_KEY']:
+    os.environ['GROQ_API_KEY'] = getpass.getpass('Enter GROQ_API_KEY: ')
+else:
+    print("GROQ_API_KEY is already set.")
+# Set up logging
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+# Load NLTK word list for valid word checks
+try:
+    english_words = set(words.words())
+except LookupError:
+    import nltk
+    nltk.download('words')
+    english_words = set(words.words())
+# Define allowed topics for health and wellness
+ALLOWED_TOPICS = [
+    "mental health",
+    "physical health",
+    "fitness",
+    "nutrition",
+    "exercise",
+    "mindfulness",
+    "sleep",
+    "stress management",
+    "wellness",
+    "relaxation",
+    "healthy lifestyle",
+    "self-care",
+    "meditation",
+    "diet",
+    "hydration",
+    "breathing techniques",
+    "yoga",
+    "stress relief",
+    "emotional health",
+    "spiritual health",
+    "healthy habits"
+]
+def is_valid_input(query):
+    """
+    Validate the user's input question.
+    """
+    if not query or query.strip() == "":
+        return False, "Input cannot be empty. Please provide a meaningful question."
+    if len(query.strip()) < 2:
+        return False, "Input is too short. Please provide more context or details."
+    # Check for valid words
+    words_in_text = re.findall(r'\b\w+\b', query.lower())
+    recognized_words = [word for word in words_in_text if word in english_words]
+    if not recognized_words:
+        return False, "Input appears unclear. Please use valid words in your question."
+    return True, "Valid input."
+def similarity_search(query, corpus, threshold=0.2):
+    """
+    Perform similarity search using TF-IDF and cosine similarity.
+    """
+    vectorizer = TfidfVectorizer()
+    tfidf_matrix = vectorizer.fit_transform(corpus + [query])
+    query_vector = tfidf_matrix[-1]
+    similarities = cosine_similarity(query_vector, tfidf_matrix[:-1]).flatten()
+    max_similarity = max(similarities)
+    if max_similarity >= threshold:
+        most_similar_idx = similarities.argmax()
+        return True, corpus[most_similar_idx], max_similarity
+    return False, None, max_similarity
+# Load and process the AIChatbot.csv file
+def load_csv(file_path):
+    """
+    Load and process a CSV file into a list of documents.
+    """
+    try:
+        with open(file_path, 'rb') as f:
+            result = chardet.detect(f.read())
+            encoding = result['encoding']
+        data = pd.read_csv(file_path, encoding=encoding)
+        questions = data['Question'].dropna().tolist()
+        documents = [
+            Document(page_content=row.to_string(index=False), metadata={"source": file_path})
+            for _, row in data.iterrows()
+        ]
+        logger.info(f"Loaded {len(documents)} documents from {file_path}")
+        return documents, questions
+    except Exception as e:
+        logger.error(f"Error loading CSV file: {e}")
+        return [], []
+# Load the AIChatbot.csv file
+file_path = "AIChatbot.csv"  # Ensure this file is uploaded to your environment
+source_docs, corpus_questions = load_csv(file_path)
+if not source_docs:
+    raise ValueError(f"Failed to load documents from {file_path}. Please check the file.")
+# Split documents into manageable chunks
+text_splitter = RecursiveCharacterTextSplitter(
+    chunk_size=500,
+    chunk_overlap=50,
+    add_start_index=True,
+    strip_whitespace=True,
+    separators=["\n\n", "\n", ".", " ", ""],
+)
+docs_processed = text_splitter.split_documents(source_docs)
+logger.info(f"Split documents into {len(docs_processed)} chunks.")
+# Define the retriever tool
+class RetrieverTool(Tool):
+    name = "retriever"
+    description = "Uses semantic search to retrieve the parts of chatbot documentation most relevant to the query."
+    inputs = {
+        "query": {
+            "type": "string",
+            "description": "The query to perform. Use an affirmative tone rather than a question."
+        }
+    }
+    output_type = "string"
+    def __init__(self, docs, **kwargs):
+        super().__init__(**kwargs)
+        self.retriever = BM25Retriever.from_documents(docs, k=10)
+    def forward(self, query: str) -> str:
+        assert isinstance(query, str), "Search query must be a string."
+        docs = self.retriever.invoke(query)
+        # Return only the content of the most relevant document
+        if docs:
+            return docs[0].page_content.strip()
+        else:
+            return "No relevant information found."
+retriever_tool = RetrieverTool(docs_processed)
+# Define the improved custom prompt
+custom_prompt = """
+You are a friendly and knowledgeable AI assistant for a daily wellness company. Your goal is to provide clear, concise, and actionable answers to the user's health and wellness-related questions. Use a warm, approachable tone to make the user feel at ease.
+When answering:
+1. Focus on brevity without sacrificing accuracy or helpfulness.
+2. Highlight key points in an easy-to-understand manner.
+3. Include examples, tips, or short step-by-step guides where relevant.
+4. Format lists or steps using markdown for better readability (e.g., numbered lists, bullet points).
+5. Ensure your response is self-contained, engaging, and ends with a polite closing remark.
+Answer each question in a similar concise, helpful, and friendly way.
+"""
+# Define the agent using smolagents
+model = LiteLLMModel("groq/llama3-8b-8192")  # Ensure the model is available
+agent = CodeAgent(
+    tools=[retriever_tool], model=model, max_iterations=4, verbose=True
+)
+# Gradio interface for interacting with the RAG pipeline
+def gradio_interface(query):
+    try:
+        is_valid, message = is_valid_input(query)
+        if not is_valid:
+            return message
+        # Perform similarity search to verify the query's viability
+        similar, similar_question, similarity_score = similarity_search(query, corpus_questions, threshold=0.2)
+        if not similar:
+            return (
+                "I'm here to assist with health and wellness-related topics. "
+                "However, I couldn't find a closely related question in the dataset. "
+                "Please refine your query."
+            )
+        # Directly query the agent if the question is valid
+        return agent.run(f"{custom_prompt}\n\nQuestion: {query}").strip()
+    except Exception as e:
+        logger.error(f"Error during query processing: {e}")
+        return "**An error occurred while processing your request. Please try again later.**"
+interface = gr.Interface(
+    fn=gradio_interface,
+    inputs=gr.Textbox(label="Enter your question", placeholder="e.g., How does box breathing help reduce anxiety?"),
+    outputs=gr.Markdown(label="Answer"),
+    title="AI Chatbot for Wellness",
+    description="Ask questions based on the AIChatbot.csv file. Focus on health and wellness topics.",
+    theme="compact"
+)
+if __name__ == "__main__":
+    interface.launch(debug=True)