HEHEBOIOG commited on
Commit
f541f43
ยท
verified ยท
1 Parent(s): 6c15522

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +74 -17
app.py CHANGED
@@ -7,9 +7,10 @@ from langchain_core.prompts import ChatPromptTemplate
7
  from langchain_groq import ChatGroq
8
  from langchain_community.embeddings import HuggingFaceBgeEmbeddings
9
  from langchain.memory import ConversationBufferMemory
10
- from transformers import pipeline
11
  from sentence_transformers import SentenceTransformer
12
  import tavily
 
13
 
14
  class AdvancedRAGChatbot:
15
  def __init__(self,
@@ -32,6 +33,8 @@ class AdvancedRAGChatbot:
32
 
33
  # Language Model Configuration
34
  self.llm = self._configure_llm(llm_model, temperature)
 
 
35
 
36
  # Conversation Memory
37
  self.memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)
@@ -66,13 +69,38 @@ class AdvancedRAGChatbot:
66
  st.error(f"Tavily Search Error: {e}")
67
  return []
68
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
69
  def process_query(self, query: str) -> Dict[str, Any]:
70
  """Process the user query with web search and NLP techniques"""
71
  # Web Search
72
  web_results = self._tavily_web_search(query)
73
 
74
  # Prepare context from web search
75
- context = "\n\n".join([
 
76
  f"Title: {result.get('title', 'N/A')}\nContent: {result.get('content', '')}"
77
  for result in web_results
78
  ])
@@ -93,7 +121,7 @@ class AdvancedRAGChatbot:
93
  Use the following web search results to answer the question precisely:
94
 
95
  Web Search Context:
96
- {context}
97
 
98
  Question: {query}
99
 
@@ -103,12 +131,18 @@ class AdvancedRAGChatbot:
103
  # Generate Response
104
  response = self.llm.invoke(full_prompt)
105
 
 
 
 
 
106
  return {
107
  "response": response.content,
108
  "web_sources": web_results,
109
  "semantic_similarity": semantic_score.tolist(),
110
  "sentiment": sentiment_result,
111
- "named_entities": entities
 
 
112
  }
113
 
114
  def main():
@@ -129,8 +163,7 @@ def main():
129
 
130
  # Sidebar Configuration
131
  with st.sidebar:
132
- st.header("๐Ÿ”ง Chatbot Settings")
133
- st.markdown("Customize your AI assistant's behavior")
134
 
135
  # Model Configuration
136
  embedding_model = st.selectbox(
@@ -139,9 +172,12 @@ def main():
139
  )
140
  temperature = st.slider("Creativity Level", 0.0, 1.0, 0.7, help="Higher values make responses more creative")
141
 
142
- # Additional Controls
143
  st.divider()
144
- st.info("Powered by Tavily Web Search")
 
 
 
145
 
146
  # Initialize Chatbot
147
  chatbot = AdvancedRAGChatbot(
@@ -169,19 +205,40 @@ def main():
169
  try:
170
  response = chatbot.process_query(user_input)
171
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
172
  # Bot Response
173
  st.markdown("#### AI's Answer")
174
  st.write(response['response'])
175
 
176
- # Sentiment Analysis
177
- st.markdown("#### Sentiment Analysis")
178
- sentiment = response['sentiment']
179
- st.metric(
180
- label="Sentiment",
181
- value=sentiment['label'],
182
- delta=f"{sentiment['score']:.2%}"
183
- )
184
-
185
  # Named Entities
186
  st.markdown("#### Detected Entities")
187
  if response['named_entities']:
 
7
  from langchain_groq import ChatGroq
8
  from langchain_community.embeddings import HuggingFaceBgeEmbeddings
9
  from langchain.memory import ConversationBufferMemory
10
+ from transformers import pipeline, AutoModelForCausalLM, AutoTokenizer
11
  from sentence_transformers import SentenceTransformer
12
  import tavily
13
+ import scipy.stats as stats
14
 
15
  class AdvancedRAGChatbot:
16
  def __init__(self,
 
33
 
34
  # Language Model Configuration
35
  self.llm = self._configure_llm(llm_model, temperature)
36
+ self.tokenizer = AutoTokenizer.from_pretrained(llm_model)
37
+ self.model = AutoModelForCausalLM.from_pretrained(llm_model)
38
 
39
  # Conversation Memory
40
  self.memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)
 
69
  st.error(f"Tavily Search Error: {e}")
70
  return []
71
 
72
+ def _calculate_perplexity(self, text: str) -> float:
73
+ """Calculate perplexity of the generated text"""
74
+ inputs = self.tokenizer(text, return_tensors="pt")
75
+ with torch.no_grad():
76
+ outputs = self.model(**inputs, labels=inputs["input_ids"])
77
+ loss = outputs.loss
78
+ return torch.exp(loss).item()
79
+
80
+ def _calculate_embedding_quality(self, query: str, context: List[str]) -> Dict[float, float]:
81
+ """Calculate embedding similarity and diversity"""
82
+ query_embedding = self.semantic_model.encode(query)
83
+ context_embeddings = self.semantic_model.encode(context)
84
+
85
+ # Cosine similarities
86
+ similarities = [np.dot(query_embedding, context_emb) / (np.linalg.norm(query_embedding) * np.linalg.norm(context_emb))
87
+ for context_emb in context_embeddings]
88
+
89
+ return {
90
+ "mean_similarity": np.mean(similarities),
91
+ "similarity_variance": np.var(similarities),
92
+ "min_similarity": np.min(similarities),
93
+ "max_similarity": np.max(similarities)
94
+ }
95
+
96
  def process_query(self, query: str) -> Dict[str, Any]:
97
  """Process the user query with web search and NLP techniques"""
98
  # Web Search
99
  web_results = self._tavily_web_search(query)
100
 
101
  # Prepare context from web search
102
+ context = [result.get('content', '') for result in web_results]
103
+ context_str = "\n\n".join([
104
  f"Title: {result.get('title', 'N/A')}\nContent: {result.get('content', '')}"
105
  for result in web_results
106
  ])
 
121
  Use the following web search results to answer the question precisely:
122
 
123
  Web Search Context:
124
+ {context_str}
125
 
126
  Question: {query}
127
 
 
131
  # Generate Response
132
  response = self.llm.invoke(full_prompt)
133
 
134
+ # Calculate additional metrics
135
+ perplexity = self._calculate_perplexity(response.content)
136
+ embedding_metrics = self._calculate_embedding_quality(query, context)
137
+
138
  return {
139
  "response": response.content,
140
  "web_sources": web_results,
141
  "semantic_similarity": semantic_score.tolist(),
142
  "sentiment": sentiment_result,
143
+ "named_entities": entities,
144
+ "perplexity": perplexity,
145
+ "embedding_metrics": embedding_metrics
146
  }
147
 
148
  def main():
 
163
 
164
  # Sidebar Configuration
165
  with st.sidebar:
166
+ st.header("๐Ÿ”ง Chatbot Metrics & Settings")
 
167
 
168
  # Model Configuration
169
  embedding_model = st.selectbox(
 
172
  )
173
  temperature = st.slider("Creativity Level", 0.0, 1.0, 0.7, help="Higher values make responses more creative")
174
 
175
+ # Metrics Section
176
  st.divider()
177
+ st.subheader("๐Ÿงฎ Performance Metrics")
178
+
179
+ # Placeholders for metrics
180
+ metrics_container = st.container()
181
 
182
  # Initialize Chatbot
183
  chatbot = AdvancedRAGChatbot(
 
205
  try:
206
  response = chatbot.process_query(user_input)
207
 
208
+ # Update Sidebar Metrics
209
+ with metrics_container:
210
+ # Semantic Similarity Metrics
211
+ st.metric(
212
+ label="๐Ÿ” Semantic Similarity Score",
213
+ value=f"{np.mean(response['semantic_similarity']):.4f}",
214
+ help="Measures how well the query matches semantic context"
215
+ )
216
+
217
+ # Embedding Quality Metrics
218
+ st.metric(
219
+ label="๐Ÿ“Š Mean Embedding Similarity",
220
+ value=f"{response['embedding_metrics']['mean_similarity']:.4f}",
221
+ delta=f"Variance: {response['embedding_metrics']['similarity_variance']:.4f}"
222
+ )
223
+
224
+ # Perplexity Metric
225
+ st.metric(
226
+ label="๐Ÿงฉ Response Perplexity",
227
+ value=f"{response['perplexity']:.2f}",
228
+ help="Lower values indicate more predictable and coherent text"
229
+ )
230
+
231
+ # Sentiment Score
232
+ st.metric(
233
+ label="๐Ÿ˜Š Query Sentiment",
234
+ value=response['sentiment']['label'],
235
+ delta=f"{response['sentiment']['score']:.2%}"
236
+ )
237
+
238
  # Bot Response
239
  st.markdown("#### AI's Answer")
240
  st.write(response['response'])
241
 
 
 
 
 
 
 
 
 
 
242
  # Named Entities
243
  st.markdown("#### Detected Entities")
244
  if response['named_entities']: