Spaces:

HEHEBOIOG
/

NewsInferno

Sleeping

App Files Files Community

HEHEBOIOG commited on Dec 9, 2024

Commit

f541f43

verified ·

1 Parent(s): 6c15522

Update app.py

Browse files

Files changed (1) hide show

app.py +74 -17

app.py CHANGED Viewed

@@ -7,9 +7,10 @@ from langchain_core.prompts import ChatPromptTemplate
 from langchain_groq import ChatGroq
 from langchain_community.embeddings import HuggingFaceBgeEmbeddings
 from langchain.memory import ConversationBufferMemory
-from transformers import pipeline
 from sentence_transformers import SentenceTransformer
 import tavily
 class AdvancedRAGChatbot:
     def __init__(self,
@@ -32,6 +33,8 @@ class AdvancedRAGChatbot:
         # Language Model Configuration
         self.llm = self._configure_llm(llm_model, temperature)
         # Conversation Memory
         self.memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)
@@ -66,13 +69,38 @@ class AdvancedRAGChatbot:
             st.error(f"Tavily Search Error: {e}")
             return []
     def process_query(self, query: str) -> Dict[str, Any]:
         """Process the user query with web search and NLP techniques"""
         # Web Search
         web_results = self._tavily_web_search(query)
         # Prepare context from web search
-        context = "\n\n".join([
             f"Title: {result.get('title', 'N/A')}\nContent: {result.get('content', '')}"
             for result in web_results
         ])
@@ -93,7 +121,7 @@ class AdvancedRAGChatbot:
         Use the following web search results to answer the question precisely:
         Web Search Context:
-        {context}
         Question: {query}
@@ -103,12 +131,18 @@ class AdvancedRAGChatbot:
         # Generate Response
         response = self.llm.invoke(full_prompt)
         return {
             "response": response.content,
             "web_sources": web_results,
             "semantic_similarity": semantic_score.tolist(),
             "sentiment": sentiment_result,
-            "named_entities": entities
         }
 def main():
@@ -129,8 +163,7 @@ def main():
     # Sidebar Configuration
     with st.sidebar:
-        st.header("🔧 Chatbot Settings")
-        st.markdown("Customize your AI assistant's behavior")
         # Model Configuration
         embedding_model = st.selectbox(
@@ -139,9 +172,12 @@ def main():
         )
         temperature = st.slider("Creativity Level", 0.0, 1.0, 0.7, help="Higher values make responses more creative")
-        # Additional Controls
         st.divider()
-        st.info("Powered by Tavily Web Search")
     # Initialize Chatbot
     chatbot = AdvancedRAGChatbot(
@@ -169,19 +205,40 @@ def main():
             try:
                 response = chatbot.process_query(user_input)
                 # Bot Response
                 st.markdown("#### AI's Answer")
                 st.write(response['response'])
-                # Sentiment Analysis
-                st.markdown("#### Sentiment Analysis")
-                sentiment = response['sentiment']
-                st.metric(
-                    label="Sentiment",
-                    value=sentiment['label'],
-                    delta=f"{sentiment['score']:.2%}"
-                )
                 # Named Entities
                 st.markdown("#### Detected Entities")
                 if response['named_entities']:

 from langchain_groq import ChatGroq
 from langchain_community.embeddings import HuggingFaceBgeEmbeddings
 from langchain.memory import ConversationBufferMemory
+from transformers import pipeline, AutoModelForCausalLM, AutoTokenizer
 from sentence_transformers import SentenceTransformer
 import tavily
+import scipy.stats as stats
 class AdvancedRAGChatbot:
     def __init__(self,
         # Language Model Configuration
         self.llm = self._configure_llm(llm_model, temperature)
+        self.tokenizer = AutoTokenizer.from_pretrained(llm_model)
+        self.model = AutoModelForCausalLM.from_pretrained(llm_model)
         # Conversation Memory
         self.memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)
             st.error(f"Tavily Search Error: {e}")
             return []
+    def _calculate_perplexity(self, text: str) -> float:
+        """Calculate perplexity of the generated text"""
+        inputs = self.tokenizer(text, return_tensors="pt")
+        with torch.no_grad():
+            outputs = self.model(**inputs, labels=inputs["input_ids"])
+            loss = outputs.loss
+        return torch.exp(loss).item()
+    def _calculate_embedding_quality(self, query: str, context: List[str]) -> Dict[float, float]:
+        """Calculate embedding similarity and diversity"""
+        query_embedding = self.semantic_model.encode(query)
+        context_embeddings = self.semantic_model.encode(context)
+        # Cosine similarities
+        similarities = [np.dot(query_embedding, context_emb) / (np.linalg.norm(query_embedding) * np.linalg.norm(context_emb))
+                        for context_emb in context_embeddings]
+        return {
+            "mean_similarity": np.mean(similarities),
+            "similarity_variance": np.var(similarities),
+            "min_similarity": np.min(similarities),
+            "max_similarity": np.max(similarities)
+        }
     def process_query(self, query: str) -> Dict[str, Any]:
         """Process the user query with web search and NLP techniques"""
         # Web Search
         web_results = self._tavily_web_search(query)
         # Prepare context from web search
+        context = [result.get('content', '') for result in web_results]
+        context_str = "\n\n".join([
             f"Title: {result.get('title', 'N/A')}\nContent: {result.get('content', '')}"
             for result in web_results
         ])
         Use the following web search results to answer the question precisely:
         Web Search Context:
+        {context_str}
         Question: {query}
         # Generate Response
         response = self.llm.invoke(full_prompt)
+        # Calculate additional metrics
+        perplexity = self._calculate_perplexity(response.content)
+        embedding_metrics = self._calculate_embedding_quality(query, context)
         return {
             "response": response.content,
             "web_sources": web_results,
             "semantic_similarity": semantic_score.tolist(),
             "sentiment": sentiment_result,
+            "named_entities": entities,
+            "perplexity": perplexity,
+            "embedding_metrics": embedding_metrics
         }
 def main():
     # Sidebar Configuration
     with st.sidebar:
+        st.header("🔧 Chatbot Metrics & Settings")
         # Model Configuration
         embedding_model = st.selectbox(
         )
         temperature = st.slider("Creativity Level", 0.0, 1.0, 0.7, help="Higher values make responses more creative")
+        # Metrics Section
         st.divider()
+        st.subheader("🧮 Performance Metrics")
+        # Placeholders for metrics
+        metrics_container = st.container()
     # Initialize Chatbot
     chatbot = AdvancedRAGChatbot(
             try:
                 response = chatbot.process_query(user_input)
+                # Update Sidebar Metrics
+                with metrics_container:
+                    # Semantic Similarity Metrics
+                    st.metric(
+                        label="🔍 Semantic Similarity Score",
+                        value=f"{np.mean(response['semantic_similarity']):.4f}",
+                        help="Measures how well the query matches semantic context"
+                    )
+                    # Embedding Quality Metrics
+                    st.metric(
+                        label="📊 Mean Embedding Similarity",
+                        value=f"{response['embedding_metrics']['mean_similarity']:.4f}",
+                        delta=f"Variance: {response['embedding_metrics']['similarity_variance']:.4f}"
+                    )
+                    # Perplexity Metric
+                    st.metric(
+                        label="🧩 Response Perplexity",
+                        value=f"{response['perplexity']:.2f}",
+                        help="Lower values indicate more predictable and coherent text"
+                    )
+                    # Sentiment Score
+                    st.metric(
+                        label="😊 Query Sentiment",
+                        value=response['sentiment']['label'],
+                        delta=f"{response['sentiment']['score']:.2%}"
+                    )
                 # Bot Response
                 st.markdown("#### AI's Answer")
                 st.write(response['response'])
                 # Named Entities
                 st.markdown("#### Detected Entities")
                 if response['named_entities']: