Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -7,16 +7,15 @@ from langchain_core.prompts import ChatPromptTemplate
|
|
7 |
from langchain_groq import ChatGroq
|
8 |
from langchain_community.embeddings import HuggingFaceBgeEmbeddings
|
9 |
from langchain.memory import ConversationBufferMemory
|
10 |
-
from transformers import pipeline
|
11 |
from sentence_transformers import SentenceTransformer
|
12 |
import tavily
|
13 |
-
import scipy.stats as stats
|
14 |
|
15 |
class AdvancedRAGChatbot:
|
16 |
def __init__(self,
|
17 |
tavily_api_key: str,
|
18 |
embedding_model: str = "BAAI/bge-large-en-v1.5",
|
19 |
-
llm_model: str = "llama-3.
|
20 |
temperature: float = 0.7):
|
21 |
"""Initialize the Advanced RAG Chatbot with Tavily web search integration"""
|
22 |
# Set the Tavily API key as an environment variable
|
@@ -33,8 +32,6 @@ class AdvancedRAGChatbot:
|
|
33 |
|
34 |
# Language Model Configuration
|
35 |
self.llm = self._configure_llm(llm_model, temperature)
|
36 |
-
self.tokenizer = AutoTokenizer.from_pretrained(llm_model)
|
37 |
-
self.model = AutoModelForCausalLM.from_pretrained(llm_model)
|
38 |
|
39 |
# Conversation Memory
|
40 |
self.memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)
|
@@ -69,38 +66,13 @@ class AdvancedRAGChatbot:
|
|
69 |
st.error(f"Tavily Search Error: {e}")
|
70 |
return []
|
71 |
|
72 |
-
def _calculate_perplexity(self, text: str) -> float:
|
73 |
-
"""Calculate perplexity of the generated text"""
|
74 |
-
inputs = self.tokenizer(text, return_tensors="pt")
|
75 |
-
with torch.no_grad():
|
76 |
-
outputs = self.model(**inputs, labels=inputs["input_ids"])
|
77 |
-
loss = outputs.loss
|
78 |
-
return torch.exp(loss).item()
|
79 |
-
|
80 |
-
def _calculate_embedding_quality(self, query: str, context: List[str]) -> Dict[float, float]:
|
81 |
-
"""Calculate embedding similarity and diversity"""
|
82 |
-
query_embedding = self.semantic_model.encode(query)
|
83 |
-
context_embeddings = self.semantic_model.encode(context)
|
84 |
-
|
85 |
-
# Cosine similarities
|
86 |
-
similarities = [np.dot(query_embedding, context_emb) / (np.linalg.norm(query_embedding) * np.linalg.norm(context_emb))
|
87 |
-
for context_emb in context_embeddings]
|
88 |
-
|
89 |
-
return {
|
90 |
-
"mean_similarity": np.mean(similarities),
|
91 |
-
"similarity_variance": np.var(similarities),
|
92 |
-
"min_similarity": np.min(similarities),
|
93 |
-
"max_similarity": np.max(similarities)
|
94 |
-
}
|
95 |
-
|
96 |
def process_query(self, query: str) -> Dict[str, Any]:
|
97 |
"""Process the user query with web search and NLP techniques"""
|
98 |
# Web Search
|
99 |
web_results = self._tavily_web_search(query)
|
100 |
|
101 |
# Prepare context from web search
|
102 |
-
context =
|
103 |
-
context_str = "\n\n".join([
|
104 |
f"Title: {result.get('title', 'N/A')}\nContent: {result.get('content', '')}"
|
105 |
for result in web_results
|
106 |
])
|
@@ -121,7 +93,7 @@ class AdvancedRAGChatbot:
|
|
121 |
Use the following web search results to answer the question precisely:
|
122 |
|
123 |
Web Search Context:
|
124 |
-
{
|
125 |
|
126 |
Question: {query}
|
127 |
|
@@ -131,18 +103,12 @@ class AdvancedRAGChatbot:
|
|
131 |
# Generate Response
|
132 |
response = self.llm.invoke(full_prompt)
|
133 |
|
134 |
-
# Calculate additional metrics
|
135 |
-
perplexity = self._calculate_perplexity(response.content)
|
136 |
-
embedding_metrics = self._calculate_embedding_quality(query, context)
|
137 |
-
|
138 |
return {
|
139 |
"response": response.content,
|
140 |
"web_sources": web_results,
|
141 |
"semantic_similarity": semantic_score.tolist(),
|
142 |
"sentiment": sentiment_result,
|
143 |
-
"named_entities": entities
|
144 |
-
"perplexity": perplexity,
|
145 |
-
"embedding_metrics": embedding_metrics
|
146 |
}
|
147 |
|
148 |
def main():
|
@@ -163,7 +129,8 @@ def main():
|
|
163 |
|
164 |
# Sidebar Configuration
|
165 |
with st.sidebar:
|
166 |
-
st.header("🔧 Chatbot
|
|
|
167 |
|
168 |
# Model Configuration
|
169 |
embedding_model = st.selectbox(
|
@@ -172,12 +139,9 @@ def main():
|
|
172 |
)
|
173 |
temperature = st.slider("Creativity Level", 0.0, 1.0, 0.7, help="Higher values make responses more creative")
|
174 |
|
175 |
-
#
|
176 |
st.divider()
|
177 |
-
st.
|
178 |
-
|
179 |
-
# Placeholders for metrics
|
180 |
-
metrics_container = st.container()
|
181 |
|
182 |
# Initialize Chatbot
|
183 |
chatbot = AdvancedRAGChatbot(
|
@@ -205,40 +169,19 @@ def main():
|
|
205 |
try:
|
206 |
response = chatbot.process_query(user_input)
|
207 |
|
208 |
-
# Update Sidebar Metrics
|
209 |
-
with metrics_container:
|
210 |
-
# Semantic Similarity Metrics
|
211 |
-
st.metric(
|
212 |
-
label="🔍 Semantic Similarity Score",
|
213 |
-
value=f"{np.mean(response['semantic_similarity']):.4f}",
|
214 |
-
help="Measures how well the query matches semantic context"
|
215 |
-
)
|
216 |
-
|
217 |
-
# Embedding Quality Metrics
|
218 |
-
st.metric(
|
219 |
-
label="📊 Mean Embedding Similarity",
|
220 |
-
value=f"{response['embedding_metrics']['mean_similarity']:.4f}",
|
221 |
-
delta=f"Variance: {response['embedding_metrics']['similarity_variance']:.4f}"
|
222 |
-
)
|
223 |
-
|
224 |
-
# Perplexity Metric
|
225 |
-
st.metric(
|
226 |
-
label="🧩 Response Perplexity",
|
227 |
-
value=f"{response['perplexity']:.2f}",
|
228 |
-
help="Lower values indicate more predictable and coherent text"
|
229 |
-
)
|
230 |
-
|
231 |
-
# Sentiment Score
|
232 |
-
st.metric(
|
233 |
-
label="😊 Query Sentiment",
|
234 |
-
value=response['sentiment']['label'],
|
235 |
-
delta=f"{response['sentiment']['score']:.2%}"
|
236 |
-
)
|
237 |
-
|
238 |
# Bot Response
|
239 |
st.markdown("#### AI's Answer")
|
240 |
st.write(response['response'])
|
241 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
242 |
# Named Entities
|
243 |
st.markdown("#### Detected Entities")
|
244 |
if response['named_entities']:
|
|
|
7 |
from langchain_groq import ChatGroq
|
8 |
from langchain_community.embeddings import HuggingFaceBgeEmbeddings
|
9 |
from langchain.memory import ConversationBufferMemory
|
10 |
+
from transformers import pipeline
|
11 |
from sentence_transformers import SentenceTransformer
|
12 |
import tavily
|
|
|
13 |
|
14 |
class AdvancedRAGChatbot:
|
15 |
def __init__(self,
|
16 |
tavily_api_key: str,
|
17 |
embedding_model: str = "BAAI/bge-large-en-v1.5",
|
18 |
+
llm_model: str = "llama-3.3-70b-versatile",
|
19 |
temperature: float = 0.7):
|
20 |
"""Initialize the Advanced RAG Chatbot with Tavily web search integration"""
|
21 |
# Set the Tavily API key as an environment variable
|
|
|
32 |
|
33 |
# Language Model Configuration
|
34 |
self.llm = self._configure_llm(llm_model, temperature)
|
|
|
|
|
35 |
|
36 |
# Conversation Memory
|
37 |
self.memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)
|
|
|
66 |
st.error(f"Tavily Search Error: {e}")
|
67 |
return []
|
68 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
69 |
def process_query(self, query: str) -> Dict[str, Any]:
|
70 |
"""Process the user query with web search and NLP techniques"""
|
71 |
# Web Search
|
72 |
web_results = self._tavily_web_search(query)
|
73 |
|
74 |
# Prepare context from web search
|
75 |
+
context = "\n\n".join([
|
|
|
76 |
f"Title: {result.get('title', 'N/A')}\nContent: {result.get('content', '')}"
|
77 |
for result in web_results
|
78 |
])
|
|
|
93 |
Use the following web search results to answer the question precisely:
|
94 |
|
95 |
Web Search Context:
|
96 |
+
{context}
|
97 |
|
98 |
Question: {query}
|
99 |
|
|
|
103 |
# Generate Response
|
104 |
response = self.llm.invoke(full_prompt)
|
105 |
|
|
|
|
|
|
|
|
|
106 |
return {
|
107 |
"response": response.content,
|
108 |
"web_sources": web_results,
|
109 |
"semantic_similarity": semantic_score.tolist(),
|
110 |
"sentiment": sentiment_result,
|
111 |
+
"named_entities": entities
|
|
|
|
|
112 |
}
|
113 |
|
114 |
def main():
|
|
|
129 |
|
130 |
# Sidebar Configuration
|
131 |
with st.sidebar:
|
132 |
+
st.header("🔧 Chatbot Settings")
|
133 |
+
st.markdown("Customize your AI assistant's behavior")
|
134 |
|
135 |
# Model Configuration
|
136 |
embedding_model = st.selectbox(
|
|
|
139 |
)
|
140 |
temperature = st.slider("Creativity Level", 0.0, 1.0, 0.7, help="Higher values make responses more creative")
|
141 |
|
142 |
+
# Additional Controls
|
143 |
st.divider()
|
144 |
+
st.info("Powered by Tavily Web Search")
|
|
|
|
|
|
|
145 |
|
146 |
# Initialize Chatbot
|
147 |
chatbot = AdvancedRAGChatbot(
|
|
|
169 |
try:
|
170 |
response = chatbot.process_query(user_input)
|
171 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
172 |
# Bot Response
|
173 |
st.markdown("#### AI's Answer")
|
174 |
st.write(response['response'])
|
175 |
|
176 |
+
# Sentiment Analysis
|
177 |
+
st.markdown("#### Sentiment Analysis")
|
178 |
+
sentiment = response['sentiment']
|
179 |
+
st.metric(
|
180 |
+
label="Sentiment",
|
181 |
+
value=sentiment['label'],
|
182 |
+
delta=f"{sentiment['score']:.2%}"
|
183 |
+
)
|
184 |
+
|
185 |
# Named Entities
|
186 |
st.markdown("#### Detected Entities")
|
187 |
if response['named_entities']:
|