HEHEBOIOG commited on
Commit
41b26ec
·
verified ·
1 Parent(s): cef79fb

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +18 -75
app.py CHANGED
@@ -7,16 +7,15 @@ from langchain_core.prompts import ChatPromptTemplate
7
  from langchain_groq import ChatGroq
8
  from langchain_community.embeddings import HuggingFaceBgeEmbeddings
9
  from langchain.memory import ConversationBufferMemory
10
- from transformers import pipeline, AutoModelForCausalLM, AutoTokenizer
11
  from sentence_transformers import SentenceTransformer
12
  import tavily
13
- import scipy.stats as stats
14
 
15
  class AdvancedRAGChatbot:
16
  def __init__(self,
17
  tavily_api_key: str,
18
  embedding_model: str = "BAAI/bge-large-en-v1.5",
19
- llm_model: str = "llama-3.2-70b-versatile",
20
  temperature: float = 0.7):
21
  """Initialize the Advanced RAG Chatbot with Tavily web search integration"""
22
  # Set the Tavily API key as an environment variable
@@ -33,8 +32,6 @@ class AdvancedRAGChatbot:
33
 
34
  # Language Model Configuration
35
  self.llm = self._configure_llm(llm_model, temperature)
36
- self.tokenizer = AutoTokenizer.from_pretrained(llm_model)
37
- self.model = AutoModelForCausalLM.from_pretrained(llm_model)
38
 
39
  # Conversation Memory
40
  self.memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)
@@ -69,38 +66,13 @@ class AdvancedRAGChatbot:
69
  st.error(f"Tavily Search Error: {e}")
70
  return []
71
 
72
- def _calculate_perplexity(self, text: str) -> float:
73
- """Calculate perplexity of the generated text"""
74
- inputs = self.tokenizer(text, return_tensors="pt")
75
- with torch.no_grad():
76
- outputs = self.model(**inputs, labels=inputs["input_ids"])
77
- loss = outputs.loss
78
- return torch.exp(loss).item()
79
-
80
- def _calculate_embedding_quality(self, query: str, context: List[str]) -> Dict[float, float]:
81
- """Calculate embedding similarity and diversity"""
82
- query_embedding = self.semantic_model.encode(query)
83
- context_embeddings = self.semantic_model.encode(context)
84
-
85
- # Cosine similarities
86
- similarities = [np.dot(query_embedding, context_emb) / (np.linalg.norm(query_embedding) * np.linalg.norm(context_emb))
87
- for context_emb in context_embeddings]
88
-
89
- return {
90
- "mean_similarity": np.mean(similarities),
91
- "similarity_variance": np.var(similarities),
92
- "min_similarity": np.min(similarities),
93
- "max_similarity": np.max(similarities)
94
- }
95
-
96
  def process_query(self, query: str) -> Dict[str, Any]:
97
  """Process the user query with web search and NLP techniques"""
98
  # Web Search
99
  web_results = self._tavily_web_search(query)
100
 
101
  # Prepare context from web search
102
- context = [result.get('content', '') for result in web_results]
103
- context_str = "\n\n".join([
104
  f"Title: {result.get('title', 'N/A')}\nContent: {result.get('content', '')}"
105
  for result in web_results
106
  ])
@@ -121,7 +93,7 @@ class AdvancedRAGChatbot:
121
  Use the following web search results to answer the question precisely:
122
 
123
  Web Search Context:
124
- {context_str}
125
 
126
  Question: {query}
127
 
@@ -131,18 +103,12 @@ class AdvancedRAGChatbot:
131
  # Generate Response
132
  response = self.llm.invoke(full_prompt)
133
 
134
- # Calculate additional metrics
135
- perplexity = self._calculate_perplexity(response.content)
136
- embedding_metrics = self._calculate_embedding_quality(query, context)
137
-
138
  return {
139
  "response": response.content,
140
  "web_sources": web_results,
141
  "semantic_similarity": semantic_score.tolist(),
142
  "sentiment": sentiment_result,
143
- "named_entities": entities,
144
- "perplexity": perplexity,
145
- "embedding_metrics": embedding_metrics
146
  }
147
 
148
  def main():
@@ -163,7 +129,8 @@ def main():
163
 
164
  # Sidebar Configuration
165
  with st.sidebar:
166
- st.header("🔧 Chatbot Metrics & Settings")
 
167
 
168
  # Model Configuration
169
  embedding_model = st.selectbox(
@@ -172,12 +139,9 @@ def main():
172
  )
173
  temperature = st.slider("Creativity Level", 0.0, 1.0, 0.7, help="Higher values make responses more creative")
174
 
175
- # Metrics Section
176
  st.divider()
177
- st.subheader("🧮 Performance Metrics")
178
-
179
- # Placeholders for metrics
180
- metrics_container = st.container()
181
 
182
  # Initialize Chatbot
183
  chatbot = AdvancedRAGChatbot(
@@ -205,40 +169,19 @@ def main():
205
  try:
206
  response = chatbot.process_query(user_input)
207
 
208
- # Update Sidebar Metrics
209
- with metrics_container:
210
- # Semantic Similarity Metrics
211
- st.metric(
212
- label="🔍 Semantic Similarity Score",
213
- value=f"{np.mean(response['semantic_similarity']):.4f}",
214
- help="Measures how well the query matches semantic context"
215
- )
216
-
217
- # Embedding Quality Metrics
218
- st.metric(
219
- label="📊 Mean Embedding Similarity",
220
- value=f"{response['embedding_metrics']['mean_similarity']:.4f}",
221
- delta=f"Variance: {response['embedding_metrics']['similarity_variance']:.4f}"
222
- )
223
-
224
- # Perplexity Metric
225
- st.metric(
226
- label="🧩 Response Perplexity",
227
- value=f"{response['perplexity']:.2f}",
228
- help="Lower values indicate more predictable and coherent text"
229
- )
230
-
231
- # Sentiment Score
232
- st.metric(
233
- label="😊 Query Sentiment",
234
- value=response['sentiment']['label'],
235
- delta=f"{response['sentiment']['score']:.2%}"
236
- )
237
-
238
  # Bot Response
239
  st.markdown("#### AI's Answer")
240
  st.write(response['response'])
241
 
 
 
 
 
 
 
 
 
 
242
  # Named Entities
243
  st.markdown("#### Detected Entities")
244
  if response['named_entities']:
 
7
  from langchain_groq import ChatGroq
8
  from langchain_community.embeddings import HuggingFaceBgeEmbeddings
9
  from langchain.memory import ConversationBufferMemory
10
+ from transformers import pipeline
11
  from sentence_transformers import SentenceTransformer
12
  import tavily
 
13
 
14
  class AdvancedRAGChatbot:
15
  def __init__(self,
16
  tavily_api_key: str,
17
  embedding_model: str = "BAAI/bge-large-en-v1.5",
18
+ llm_model: str = "llama-3.3-70b-versatile",
19
  temperature: float = 0.7):
20
  """Initialize the Advanced RAG Chatbot with Tavily web search integration"""
21
  # Set the Tavily API key as an environment variable
 
32
 
33
  # Language Model Configuration
34
  self.llm = self._configure_llm(llm_model, temperature)
 
 
35
 
36
  # Conversation Memory
37
  self.memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)
 
66
  st.error(f"Tavily Search Error: {e}")
67
  return []
68
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
69
  def process_query(self, query: str) -> Dict[str, Any]:
70
  """Process the user query with web search and NLP techniques"""
71
  # Web Search
72
  web_results = self._tavily_web_search(query)
73
 
74
  # Prepare context from web search
75
+ context = "\n\n".join([
 
76
  f"Title: {result.get('title', 'N/A')}\nContent: {result.get('content', '')}"
77
  for result in web_results
78
  ])
 
93
  Use the following web search results to answer the question precisely:
94
 
95
  Web Search Context:
96
+ {context}
97
 
98
  Question: {query}
99
 
 
103
  # Generate Response
104
  response = self.llm.invoke(full_prompt)
105
 
 
 
 
 
106
  return {
107
  "response": response.content,
108
  "web_sources": web_results,
109
  "semantic_similarity": semantic_score.tolist(),
110
  "sentiment": sentiment_result,
111
+ "named_entities": entities
 
 
112
  }
113
 
114
  def main():
 
129
 
130
  # Sidebar Configuration
131
  with st.sidebar:
132
+ st.header("🔧 Chatbot Settings")
133
+ st.markdown("Customize your AI assistant's behavior")
134
 
135
  # Model Configuration
136
  embedding_model = st.selectbox(
 
139
  )
140
  temperature = st.slider("Creativity Level", 0.0, 1.0, 0.7, help="Higher values make responses more creative")
141
 
142
+ # Additional Controls
143
  st.divider()
144
+ st.info("Powered by Tavily Web Search")
 
 
 
145
 
146
  # Initialize Chatbot
147
  chatbot = AdvancedRAGChatbot(
 
169
  try:
170
  response = chatbot.process_query(user_input)
171
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
172
  # Bot Response
173
  st.markdown("#### AI's Answer")
174
  st.write(response['response'])
175
 
176
+ # Sentiment Analysis
177
+ st.markdown("#### Sentiment Analysis")
178
+ sentiment = response['sentiment']
179
+ st.metric(
180
+ label="Sentiment",
181
+ value=sentiment['label'],
182
+ delta=f"{sentiment['score']:.2%}"
183
+ )
184
+
185
  # Named Entities
186
  st.markdown("#### Detected Entities")
187
  if response['named_entities']: