Spaces:

rameshmoorthy
/

test_bot

Sleeping

App Files Files Community

rameshmoorthy commited on Jun 22

Commit

bae70e0

verified ·

1 Parent(s): a78a6b1

Update app.py

Browse files

Files changed (1) hide show

app.py +143 -25

app.py CHANGED Viewed

@@ -3,6 +3,10 @@ from phi.agent import Agent
 from phi.model.groq import Groq
 import os
 import logging
 # Set up logging
 logging.basicConfig(level=logging.INFO)
@@ -19,58 +23,172 @@ else:
 # Initialize PhiData Agent
 agent = Agent(
-    model=Groq(id="llama3-70b-8192", api_key=api_key),
     instructions=[
-        "You are a helpful assistant designed to answer questions on various topics.",
-        "Provide concise and accurate responses.",
-        "If you don't know the answer, say 'I don’t have enough information to answer that.'"
     ],
     markdown=True
 )
-def simple_chat_function(message, history):
-    """Chat function with PhiData agent integration"""
-    if not message.strip():
-        return "", history
-    # Generate response using PhiData agent
     try:
-        response = agent.run(message)
-        print(response)
-        response_text = response.content if hasattr(response, 'content') else "Error generating response."
     except Exception as e:
-        logger.error(f"Agent error: {e}")
-        response_text = "Sorry, there was an error processing your request."
     # Add to history
-    history.append([message, response_text])
     return "", history
 # Minimal working interface
-with gr.Blocks() as demo:
-    chatbot = gr.Chatbot()
     msg = gr.Textbox(placeholder="Type your message here...")
     clear = gr.Button("Clear")
-    msg.submit(simple_chat_function, [msg, chatbot], [msg, chatbot])
     clear.click(lambda: ([], ""), outputs=[chatbot, msg])
 if __name__ == "__main__":
-    demo.launch()
-# import gradio as gr
-# import time
 # def simple_chat_function(message, history):
-#     """Simplified chat function for testing"""
 #     if not message.strip():
 #         return "", history
-#     # Your response generation logic here
-#     response = f"You asked: {message}"  # Replace with your actual logic
 #     # Add to history
-#     history.append([message, response])
 #     return "", history

 from phi.model.groq import Groq
 import os
 import logging
+from sentence_transformers import CrossEncoder
+from backend.semantic_search import table, retriever
+import numpy as np
+from time import perf_counter
 # Set up logging
 logging.basicConfig(level=logging.INFO)
 # Initialize PhiData Agent
 agent = Agent(
+    name="Science Education Assistant",
+    role="You are a helpful science tutor for 10th-grade students",
     instructions=[
+        "You are an expert science teacher specializing in 10th-grade curriculum.",
+        "Provide clear, accurate, and age-appropriate explanations.",
+        "Use simple language and examples that students can understand.",
+        "Focus on concepts from physics, chemistry, and biology.",
+        "Structure responses with headings and bullet points when helpful.",
+        "Encourage learning and curiosity."
     ],
+    model=Groq(id="llama3-70b-8192", api_key=api_key),
     markdown=True
 )
+# Response Generation Function
+def retrieve_and_generate_response(query, cross_encoder_choice, history=None):
+    """Generate response using semantic search and LLM"""
+    top_rerank = 25
+    top_k_rank = 20
+    if not query.strip():
+        return "Please provide a valid question."
     try:
+        start_time = perf_counter()
+        # Encode query and search documents
+        query_vec = retriever.encode(query)
+        documents = table.search(query_vec, vector_column_name="vector").limit(top_rerank).to_list()
+        documents = [doc["text"] for doc in documents]
+        # Re-rank documents using cross-encoder
+        cross_encoder_model = CrossEncoder('BAAI/bge-reranker-base') if cross_encoder_choice == '(ACCURATE) BGE reranker' else CrossEncoder('cross-encoder/ms-marco-MiniLM-L-6-v2')
+        query_doc_pair = [[query, doc] for doc in documents]
+        cross_scores = cross_encoder_model.predict(query_doc_pair)
+        sim_scores_argsort = list(reversed(np.argsort(cross_scores)))
+        documents = [documents[idx] for idx in sim_scores_argsort[:top_k_rank]]
+        # Create context from top documents
+        context = "\n\n".join(documents[:10]) if documents else ""
+        context = f"Context information from educational materials:\n{context}\n\n"
+        # Add conversation history for context
+        history_context = ""
+        if history and len(history) > 0:
+            for user_msg, bot_msg in history[-2:]:  # Last 2 exchanges
+                if user_msg and bot_msg:
+                    history_context += f"Previous Q: {user_msg}\nPrevious A: {bot_msg}\n"
+        # Create full prompt
+        full_prompt = f"{history_context}{context}Question: {query}\n\nPlease answer the question using the context provided above. If the context doesn't contain relevant information, use your general knowledge about 10th-grade science topics."
+        # Generate response
+        response = agent.run(full_prompt)
+        response_text = response.content if hasattr(response, 'content') else str(response)
+        logger.info(f"Response generation took {perf_counter() - start_time:.2f} seconds")
+        return response_text
     except Exception as e:
+        logger.error(f"Error in response generation: {e}")
+        return f"Error generating response: {str(e)}"
+def simple_chat_function(message, history, cross_encoder_choice):
+    """Chat function with semantic search and retriever integration"""
+    if not message.strip():
+        return "", history
+    # Generate response using the semantic search function
+    response = retrieve_and_generate_response(message, cross_encoder_choice, history)
     # Add to history
+    history.append([message, response])
     return "", history
 # Minimal working interface
+with gr.Blocks(title="Science Chatbot") as demo:
+    # Cross-encoder selection
+    cross_encoder = gr.Radio(
+        choices=['(FAST) MiniLM-L6v2', '(ACCURATE) BGE reranker'],
+        value='(ACCURATE) BGE reranker',
+        label="Embeddings Model",
+        info="Select the model for document ranking"
+    )
+    chatbot = gr.Chatbot(label="Science Tutor Conversation")
     msg = gr.Textbox(placeholder="Type your message here...")
     clear = gr.Button("Clear")
+    msg.submit(simple_chat_function, [msg, chatbot, cross_encoder], [msg, chatbot])
     clear.click(lambda: ([], ""), outputs=[chatbot, msg])
 if __name__ == "__main__":
+    demo.launch()# import gradio as gr
+# from phi.agent import Agent
+# from phi.model.groq import Groq
+# import os
+# import logging
+# from sentence_transformers import SentenceTransformer
+# from typing import List
+# # Set up logging
+# logging.basicConfig(level=logging.INFO)
+# logger = logging.getLogger(__name__)
+# # API Key setup
+# api_key = os.getenv("GROQ_API_KEY")
+# if not api_key:
+#     gr.Warning("GROQ_API_KEY not found. Set it in 'Repository secrets'.")
+#     logger.error("GROQ_API_KEY not found.")
+#     api_key = ""  # Fallback to empty string, but this will fail without a key
+# else:
+#     os.environ["GROQ_API_KEY"] = api_key
+# # Initialize PhiData Agent
+# agent = Agent(
+#     model=Groq(model="llama3-70b-8192", api_key=api_key),
+#     instructions=[
+#         "You are a helpful assistant designed to answer questions on various topics.",
+#         "Use the provided context from retrieved documents to answer questions.",
+#         "If you don't have enough information, say 'I don’t have enough information to answer that.'"
+#     ],
+#     markdown=True
+# )
+# # Load a simple embedding model
+# embedding_model = SentenceTransformer('all-MiniLM-L6-v2')
+# # Simulated document corpus
+# documents = [
+#     "The capital of France is Paris.",
+#     "Python is a popular programming language.",
+#     "Semantic search uses embeddings to find relevant documents.",
+#     "The Eiffel Tower is located in Paris."
+# ]
+# # Convert documents to embeddings and store them
+# document_embeddings = embedding_model.encode(documents, convert_to_tensor=True)
+# import numpy as np
+# def retrieve_documents(query: str, k: int = 2) -> List[str]:
+#     """Simple retriever using cosine similarity."""
+#     query_embedding = embedding_model.encode(query, convert_to_tensor=True)
+#     similarities = np.dot(document_embeddings, query_embedding.T).cpu().numpy()
+#     top_k_indices = similarities.argsort()[-k:][::-1]
+#     return [documents[i] for i in top_k_indices]
 # def simple_chat_function(message, history):
+#     """Chat function with semantic search and retriever integration"""
 #     if not message.strip():
 #         return "", history
+#     # Retrieve relevant documents
+#     context = retrieve_documents(message)
+#     context_text = "\n".join(context) if context else "No relevant context found."
+#     # Generate response using PhiData agent with context
+#     try:
+#         response = agent.run(f"Context: {context_text}\n\nQuestion: {message}")
+#         response_text = response.content if hasattr(response, 'content') else "Error generating response."
+#     except Exception as e:
+#         logger.error(f"Agent error: {e}")
+#         response_text = "Sorry, there was an error processing your request."
 #     # Add to history
+#     history.append([message, response_text])
 #     return "", history