josondev commited on
Commit
7cc7889
Β·
verified Β·
1 Parent(s): eb69d08

Update veryfinal.py

Browse files
Files changed (1) hide show
  1. veryfinal.py +320 -422
veryfinal.py CHANGED
@@ -1,462 +1,360 @@
1
- """Enhanced LangGraph + Agno Hybrid Agent System"""
2
- import os, time, random, asyncio
3
- from dotenv import load_dotenv
4
- from typing import List, Dict, Any, TypedDict, Annotated
5
- import operator
 
 
 
6
 
7
- # LangGraph imports
8
- from langgraph.graph import START, StateGraph, MessagesState
9
- from langgraph.prebuilt import tools_condition
10
- from langgraph.prebuilt import ToolNode
11
- from langgraph.checkpoint.memory import MemorySaver
12
 
13
- # LangChain imports
14
- from langchain_core.messages import SystemMessage, HumanMessage, AIMessage
15
- from langchain_core.tools import tool
16
- from langchain_groq import ChatGroq
17
- from langchain_google_genai import ChatGoogleGenerativeAI
18
- from langchain_nvidia_ai_endpoints import ChatNVIDIA
19
- from langchain_community.tools.tavily_search import TavilySearchResults
20
- from langchain_community.document_loaders import WikipediaLoader, ArxivLoader
21
- from langchain_community.vectorstores import FAISS
22
- from langchain_nvidia_ai_endpoints import NVIDIAEmbeddings
23
- from langchain.tools.retriever import create_retriever_tool
24
- from langchain_text_splitters import RecursiveCharacterTextSplitter
25
- from langchain_community.document_loaders import JSONLoader
26
-
27
- # Agno imports
28
- from agno.agent import Agent
29
- from agno.models.groq import GroqChat
30
- from agno.models.google import GeminiChat
31
- from agno.tools.duckduckgo import DuckDuckGoTools
32
- from agno.memory.agent import AgentMemory
33
- from agno.storage.agent import AgentStorage
34
-
35
- load_dotenv()
36
 
37
- # Enhanced Rate Limiter with Performance Optimization
38
- class PerformanceRateLimiter:
39
- def __init__(self, requests_per_minute: int, provider_name: str):
40
- self.requests_per_minute = requests_per_minute
41
- self.provider_name = provider_name
42
- self.request_times = []
43
- self.consecutive_failures = 0
44
- self.performance_cache = {} # Cache for repeated queries
45
-
46
- def wait_if_needed(self):
47
- current_time = time.time()
48
- self.request_times = [t for t in self.request_times if current_time - t < 60]
49
 
50
- if len(self.request_times) >= self.requests_per_minute:
51
- wait_time = 60 - (current_time - self.request_times[0]) + random.uniform(1, 3)
52
- time.sleep(wait_time)
53
-
54
- if self.consecutive_failures > 0:
55
- backoff_time = min(2 ** self.consecutive_failures, 30) + random.uniform(0.5, 1.5)
56
- time.sleep(backoff_time)
57
-
58
- self.request_times.append(current_time)
59
-
60
- def record_success(self):
61
- self.consecutive_failures = 0
62
-
63
- def record_failure(self):
64
- self.consecutive_failures += 1
65
-
66
- # Initialize optimized rate limiters
67
- gemini_limiter = PerformanceRateLimiter(requests_per_minute=28, provider_name="Gemini")
68
- groq_limiter = PerformanceRateLimiter(requests_per_minute=28, provider_name="Groq")
69
- nvidia_limiter = PerformanceRateLimiter(requests_per_minute=4, provider_name="NVIDIA")
70
-
71
- # Agno Agent Setup with Performance Optimization
72
- def create_agno_agents():
73
- """Create high-performance Agno agents"""
74
-
75
- # Storage for persistent memory
76
- storage = AgentStorage(
77
- table_name="agent_sessions",
78
- db_file="tmp/agent_storage.db"
79
- )
80
-
81
- # Math specialist using Groq (fastest)
82
- math_agent = Agent(
83
- name="MathSpecialist",
84
- model=GroqChat(
85
- model="llama-3.3-70b-versatile",
86
- api_key=os.getenv("GROQ_API_KEY"),
87
- temperature=0
88
- ),
89
- description="Expert mathematical problem solver",
90
- instructions=[
91
- "Solve mathematical problems with precision",
92
- "Show step-by-step calculations",
93
- "Use tools for complex computations",
94
- "Always provide numerical answers"
95
- ],
96
- memory=AgentMemory(
97
- db=storage,
98
- create_user_memories=True,
99
- create_session_summary=True
100
- ),
101
- show_tool_calls=False,
102
- markdown=False
103
- )
104
-
105
- # Research specialist using Gemini (most capable)
106
- research_agent = Agent(
107
- name="ResearchSpecialist",
108
- model=GeminiChat(
109
- model="gemini-2.0-flash-lite",
110
- api_key=os.getenv("GOOGLE_API_KEY"),
111
- temperature=0
112
- ),
113
- description="Expert research and information gathering specialist",
114
- instructions=[
115
- "Conduct thorough research using available tools",
116
- "Synthesize information from multiple sources",
117
- "Provide comprehensive, well-cited answers",
118
- "Focus on accuracy and relevance"
119
- ],
120
- tools=[DuckDuckGoTools()],
121
- memory=AgentMemory(
122
- db=storage,
123
- create_user_memories=True,
124
- create_session_summary=True
125
- ),
126
- show_tool_calls=False,
127
- markdown=False
128
- )
129
-
130
- return {
131
- "math": math_agent,
132
- "research": research_agent
133
- }
134
-
135
- # LangGraph Tools (optimized)
136
- @tool
137
- def multiply(a: int, b: int) -> int:
138
- """Multiply two numbers."""
139
- return a * b
140
-
141
- @tool
142
- def add(a: int, b: int) -> int:
143
- """Add two numbers."""
144
- return a + b
145
-
146
- @tool
147
- def subtract(a: int, b: int) -> int:
148
- """Subtract two numbers."""
149
- return a - b
150
 
151
- @tool
152
- def divide(a: int, b: int) -> float:
153
- """Divide two numbers."""
154
- if b == 0:
155
- raise ValueError("Cannot divide by zero.")
156
- return a / b
 
157
 
158
- @tool
159
- def modulus(a: int, b: int) -> int:
160
- """Get the modulus of two numbers."""
161
- return a % b
 
 
162
 
163
- @tool
164
- def optimized_web_search(query: str) -> str:
165
- """Optimized web search with caching."""
166
- try:
167
- time.sleep(random.uniform(1, 2)) # Reduced wait time
168
- search_docs = TavilySearchResults(max_results=2).invoke(query=query) # Reduced results for speed
169
- formatted_search_docs = "\n\n---\n\n".join([
170
- f'<Document source="{doc.get("url", "")}" />\n{doc.get("content", "")[:500]}\n</Document>' # Truncated for speed
171
- for doc in search_docs
172
- ])
173
- return formatted_search_docs
174
- except Exception as e:
175
- return f"Web search failed: {str(e)}"
176
 
177
- @tool
178
- def optimized_wiki_search(query: str) -> str:
179
- """Optimized Wikipedia search."""
180
  try:
181
- time.sleep(random.uniform(0.5, 1)) # Reduced wait time
182
- search_docs = WikipediaLoader(query=query, load_max_docs=1).load()
183
- formatted_search_docs = "\n\n---\n\n".join([
184
- f'<Document source="{doc.metadata["source"]}" />\n{doc.page_content[:800]}\n</Document>' # Truncated for speed
185
- for doc in search_docs
186
- ])
187
- return formatted_search_docs
188
  except Exception as e:
189
- return f"Wikipedia search failed: {str(e)}"
 
 
 
 
190
 
191
- # Optimized FAISS setup
192
- def setup_optimized_faiss():
193
- """Setup optimized FAISS vector store"""
194
  try:
195
- jq_schema = """
196
- {
197
- page_content: .Question,
198
- metadata: {
199
- task_id: .task_id,
200
- Final_answer: ."Final answer"
201
- }
202
- }
203
- """
204
-
205
- json_loader = JSONLoader(file_path="metadata.jsonl", jq_schema=jq_schema, json_lines=True, text_content=False)
206
- json_docs = json_loader.load()
207
-
208
- # Smaller chunks for faster processing
209
- text_splitter = RecursiveCharacterTextSplitter(chunk_size=256, chunk_overlap=50)
210
- json_chunks = text_splitter.split_documents(json_docs)
211
-
212
- embeddings = NVIDIAEmbeddings(
213
- model="nvidia/nv-embedqa-e5-v5",
214
- api_key=os.getenv("NVIDIA_API_KEY")
215
- )
216
- vector_store = FAISS.from_documents(json_chunks, embeddings)
217
-
218
- return vector_store
219
  except Exception as e:
220
- print(f"FAISS setup failed: {e}")
221
- return None
222
 
223
- # Enhanced State with Performance Tracking
224
- class EnhancedAgentState(TypedDict):
225
- messages: Annotated[List[HumanMessage | AIMessage], operator.add]
226
- query: str
227
- agent_type: str
228
- final_answer: str
229
- performance_metrics: Dict[str, Any]
230
- agno_response: str
231
-
232
- # Hybrid LangGraph + Agno System
233
- class HybridLangGraphAgnoSystem:
234
- def __init__(self):
235
- self.agno_agents = create_agno_agents()
236
- self.vector_store = setup_optimized_faiss()
237
- self.langgraph_tools = [multiply, add, subtract, divide, modulus, optimized_web_search, optimized_wiki_search]
238
-
239
- if self.vector_store:
240
- retriever = self.vector_store.as_retriever(search_type="similarity", search_kwargs={"k": 2})
241
- retriever_tool = create_retriever_tool(
242
- retriever=retriever,
243
- name="Question_Search",
244
- description="Retrieve similar questions from knowledge base."
245
- )
246
- self.langgraph_tools.append(retriever_tool)
247
-
248
- self.graph = self._build_hybrid_graph()
249
 
250
- def _build_hybrid_graph(self):
251
- """Build hybrid LangGraph with Agno integration"""
252
-
253
- # LangGraph LLMs
254
- groq_llm = ChatGroq(model="llama-3.3-70b-versatile", temperature=0)
255
- gemini_llm = ChatGoogleGenerativeAI(model="gemini-2.0-flash-lite", temperature=0)
256
 
257
- def router_node(state: EnhancedAgentState) -> EnhancedAgentState:
258
- """Smart routing between LangGraph and Agno"""
259
- query = state["query"].lower()
260
-
261
- # Route math to LangGraph (faster for calculations)
262
- if any(word in query for word in ['calculate', 'math', 'multiply', 'add', 'subtract', 'divide']):
263
- agent_type = "langgraph_math"
264
- # Route complex research to Agno (better reasoning)
265
- elif any(word in query for word in ['research', 'analyze', 'explain', 'compare']):
266
- agent_type = "agno_research"
267
- # Route factual queries to LangGraph (faster retrieval)
268
- elif any(word in query for word in ['what is', 'who is', 'when', 'where']):
269
- agent_type = "langgraph_retrieval"
270
- else:
271
- agent_type = "agno_general"
272
 
273
- return {**state, "agent_type": agent_type}
274
 
275
- def langgraph_math_node(state: EnhancedAgentState) -> EnhancedAgentState:
276
- """LangGraph math processing (optimized for speed)"""
277
- groq_limiter.wait_if_needed()
 
278
 
279
- start_time = time.time()
280
- llm_with_tools = groq_llm.bind_tools([multiply, add, subtract, divide, modulus])
 
 
 
281
 
282
- system_msg = SystemMessage(content="You are a fast mathematical calculator. Use tools for calculations. Provide precise numerical answers. Format: FINAL ANSWER: [result]")
283
- messages = [system_msg, HumanMessage(content=state["query"])]
 
284
 
285
- try:
286
- response = llm_with_tools.invoke(messages)
287
- processing_time = time.time() - start_time
288
-
289
- return {
290
- **state,
291
- "messages": state["messages"] + [response],
292
- "final_answer": response.content,
293
- "performance_metrics": {"processing_time": processing_time, "provider": "LangGraph-Groq"}
294
- }
295
- except Exception as e:
296
- return {**state, "final_answer": f"Math processing error: {str(e)}"}
297
-
298
- def agno_research_node(state: EnhancedAgentState) -> EnhancedAgentState:
299
- """Agno research processing (optimized for quality)"""
300
- gemini_limiter.wait_if_needed()
301
 
302
- start_time = time.time()
303
- try:
304
- # Use Agno's research agent for complex reasoning
305
- response = self.agno_agents["research"].run(state["query"], stream=False)
306
- processing_time = time.time() - start_time
307
-
308
- return {
309
- **state,
310
- "agno_response": response,
311
- "final_answer": response,
312
- "performance_metrics": {"processing_time": processing_time, "provider": "Agno-Gemini"}
313
- }
314
- except Exception as e:
315
- return {**state, "final_answer": f"Research processing error: {str(e)}"}
316
-
317
- def langgraph_retrieval_node(state: EnhancedAgentState) -> EnhancedAgentState:
318
- """LangGraph retrieval processing (optimized for speed)"""
319
- groq_limiter.wait_if_needed()
320
 
321
- start_time = time.time()
322
- llm_with_tools = groq_llm.bind_tools(self.langgraph_tools)
 
 
 
 
 
 
323
 
324
- system_msg = SystemMessage(content="You are a fast information retrieval assistant. Use search tools efficiently. Provide concise, accurate answers. Format: FINAL ANSWER: [answer]")
325
- messages = [system_msg, HumanMessage(content=state["query"])]
326
 
327
- try:
328
- response = llm_with_tools.invoke(messages)
329
- processing_time = time.time() - start_time
330
-
331
- return {
332
- **state,
333
- "messages": state["messages"] + [response],
334
- "final_answer": response.content,
335
- "performance_metrics": {"processing_time": processing_time, "provider": "LangGraph-Retrieval"}
336
- }
337
- except Exception as e:
338
- return {**state, "final_answer": f"Retrieval processing error: {str(e)}"}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
339
 
340
- def agno_general_node(state: EnhancedAgentState) -> EnhancedAgentState:
341
- """Agno general processing"""
342
- gemini_limiter.wait_if_needed()
343
-
344
- start_time = time.time()
345
- try:
346
- # Route to appropriate Agno agent based on query complexity
347
- if any(word in state["query"].lower() for word in ['calculate', 'compute']):
348
- response = self.agno_agents["math"].run(state["query"], stream=False)
349
- else:
350
- response = self.agno_agents["research"].run(state["query"], stream=False)
351
-
352
- processing_time = time.time() - start_time
353
-
354
- return {
355
- **state,
356
- "agno_response": response,
357
- "final_answer": response,
358
- "performance_metrics": {"processing_time": processing_time, "provider": "Agno-General"}
359
- }
360
- except Exception as e:
361
- return {**state, "final_answer": f"General processing error: {str(e)}"}
362
 
363
- def route_agent(state: EnhancedAgentState) -> str:
364
- """Route to appropriate processing node"""
365
- agent_type = state.get("agent_type", "agno_general")
366
- return agent_type
 
 
 
 
 
 
 
367
 
368
- # Build the graph
369
- builder = StateGraph(EnhancedAgentState)
370
- builder.add_node("router", router_node)
371
- builder.add_node("langgraph_math", langgraph_math_node)
372
- builder.add_node("agno_research", agno_research_node)
373
- builder.add_node("langgraph_retrieval", langgraph_retrieval_node)
374
- builder.add_node("agno_general", agno_general_node)
375
 
376
- builder.set_entry_point("router")
377
- builder.add_conditional_edges(
378
- "router",
379
- route_agent,
380
- {
381
- "langgraph_math": "langgraph_math",
382
- "agno_research": "agno_research",
383
- "langgraph_retrieval": "langgraph_retrieval",
384
- "agno_general": "agno_general"
385
- }
386
- )
387
 
388
- # All nodes end the workflow
389
- for node in ["langgraph_math", "agno_research", "langgraph_retrieval", "agno_general"]:
390
- builder.add_edge(node, "END")
 
 
 
 
 
 
 
 
 
391
 
392
- memory = MemorySaver()
393
- return builder.compile(checkpointer=memory)
394
-
395
- def process_query(self, query: str) -> Dict[str, Any]:
396
- """Process query with performance optimization"""
397
- start_time = time.time()
398
 
399
- initial_state = {
400
- "messages": [HumanMessage(content=query)],
401
- "query": query,
402
- "agent_type": "",
403
- "final_answer": "",
404
- "performance_metrics": {},
405
- "agno_response": ""
406
- }
407
 
408
- config = {"configurable": {"thread_id": f"hybrid_{hash(query)}"}}
 
 
 
 
409
 
410
- try:
411
- result = self.graph.invoke(initial_state, config)
412
- total_time = time.time() - start_time
413
-
414
- return {
415
- "answer": result.get("final_answer", "No response generated"),
416
- "performance_metrics": {
417
- **result.get("performance_metrics", {}),
418
- "total_time": total_time
419
- },
420
- "provider_used": result.get("performance_metrics", {}).get("provider", "Unknown")
421
- }
422
- except Exception as e:
423
- return {
424
- "answer": f"Error: {str(e)}",
425
- "performance_metrics": {"total_time": time.time() - start_time, "error": True},
426
- "provider_used": "Error"
427
- }
428
 
429
- # Build graph function for compatibility
430
- def build_graph(provider: str = "hybrid"):
431
- """Build the hybrid graph system"""
432
- if provider == "hybrid":
433
- system = HybridLangGraphAgnoSystem()
434
- return system.graph
435
- else:
436
- # Fallback to original implementation
437
- return build_original_graph(provider)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
438
 
439
- def build_original_graph(provider: str):
440
- """Original graph implementation for fallback"""
441
- # Implementation of original graph...
442
- pass
 
 
 
 
 
 
 
 
443
 
444
- # Main execution
445
  if __name__ == "__main__":
446
- # Test the hybrid system
447
- hybrid_system = HybridLangGraphAgnoSystem()
 
448
 
449
- test_queries = [
450
- "What is 25 * 4 + 10?", # Should route to LangGraph math
451
- "Explain the economic impacts of AI automation", # Should route to Agno research
452
- "What are the names of US presidents who were assassinated?", # Should route to LangGraph retrieval
453
- "Compare quantum computing with classical computing" # Should route to Agno general
454
- ]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
455
 
456
- for query in test_queries:
457
- print(f"\nQuery: {query}")
458
- result = hybrid_system.process_query(query)
459
- print(f"Answer: {result['answer']}")
460
- print(f"Provider: {result['provider_used']}")
461
- print(f"Processing Time: {result['performance_metrics'].get('total_time', 0):.2f}s")
462
- print("-" * 80)
 
1
+ """ Enhanced Hybrid Agent Evaluation Runner"""
2
+ import os
3
+ import inspect
4
+ import gradio as gr
5
+ import requests
6
+ import pandas as pd
7
+ from langchain_core.messages import HumanMessage
8
+ from agent import HybridLangGraphAgnoSystem
9
 
10
+ # --- Constants ---
11
+ DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
 
 
 
12
 
13
+ # --- Enhanced Basic Agent Definition ---
14
+ class BasicAgent:
15
+ """A hybrid LangGraph + Agno agent with performance optimization."""
16
+ def __init__(self):
17
+ print("BasicAgent initialized with Hybrid LangGraph + Agno System.")
18
+ self.hybrid_system = HybridLangGraphAgnoSystem()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
19
 
20
+ def __call__(self, question: str) -> str:
21
+ print(f"Agent received question: {question}")
 
 
 
 
 
 
 
 
 
 
22
 
23
+ try:
24
+ # Process query using hybrid system
25
+ result = self.hybrid_system.process_query(question)
26
+
27
+ # Extract final answer
28
+ answer = result.get("answer", "No response generated")
29
+
30
+ # Clean up the answer - extract only final answer if present
31
+ if "FINAL ANSWER:" in answer:
32
+ final_answer = answer.split("FINAL ANSWER:")[-1].strip()
33
+ else:
34
+ final_answer = answer.strip()
35
+
36
+ # Log performance metrics for debugging
37
+ metrics = result.get("performance_metrics", {})
38
+ provider = result.get("provider_used", "Unknown")
39
+ processing_time = metrics.get("total_time", 0)
40
+
41
+ print(f"Provider used: {provider}, Processing time: {processing_time:.2f}s")
42
+
43
+ return final_answer
44
+
45
+ except Exception as e:
46
+ print(f"Error in agent processing: {e}")
47
+ return f"Error: {str(e)}"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
48
 
49
+ def run_and_submit_all(profile: gr.OAuthProfile | None):
50
+ """
51
+ Fetches all questions, runs the Enhanced Hybrid Agent on them, submits all answers,
52
+ and displays the results with performance metrics.
53
+ """
54
+ # --- Determine HF Space Runtime URL and Repo URL ---
55
+ space_id = os.getenv("SPACE_ID") # Get the SPACE_ID for sending link to the code
56
 
57
+ if profile:
58
+ username= f"{profile.username}"
59
+ print(f"User logged in: {username}")
60
+ else:
61
+ print("User not logged in.")
62
+ return "Please Login to Hugging Face with the button.", None
63
 
64
+ api_url = DEFAULT_API_URL
65
+ questions_url = f"{api_url}/questions"
66
+ submit_url = f"{api_url}/submit"
 
 
 
 
 
 
 
 
 
 
67
 
68
+ # 1. Instantiate Enhanced Hybrid Agent
 
 
69
  try:
70
+ agent = BasicAgent()
71
+ print("βœ… Hybrid LangGraph + Agno Agent initialized successfully")
 
 
 
 
 
72
  except Exception as e:
73
+ print(f"❌ Error instantiating hybrid agent: {e}")
74
+ return f"Error initializing hybrid agent: {e}", None
75
+
76
+ agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
77
+ print(f"πŸ”— Agent code repository: {agent_code}")
78
 
79
+ # 2. Fetch Questions
80
+ print(f"πŸ“₯ Fetching questions from: {questions_url}")
 
81
  try:
82
+ response = requests.get(questions_url, timeout=15)
83
+ response.raise_for_status()
84
+ questions_data = response.json()
85
+ if not questions_data:
86
+ print("❌ Fetched questions list is empty.")
87
+ return "Fetched questions list is empty or invalid format.", None
88
+ print(f"βœ… Fetched {len(questions_data)} questions successfully.")
89
+ except requests.exceptions.RequestException as e:
90
+ print(f"❌ Error fetching questions: {e}")
91
+ return f"Error fetching questions: {e}", None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
92
  except Exception as e:
93
+ print(f"❌ An unexpected error occurred fetching questions: {e}")
94
+ return f"An unexpected error occurred fetching questions: {e}", None
95
 
96
+ # 3. Run Enhanced Hybrid Agent with Performance Tracking
97
+ results_log = []
98
+ answers_payload = []
99
+ performance_stats = {
100
+ "langgraph_math": 0,
101
+ "agno_research": 0,
102
+ "langgraph_retrieval": 0,
103
+ "agno_general": 0,
104
+ "errors": 0,
105
+ "total_processing_time": 0
106
+ }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
107
 
108
+ print(f"πŸš€ Running Enhanced Hybrid Agent on {len(questions_data)} questions...")
109
+
110
+ for i, item in enumerate(questions_data, 1):
111
+ task_id = item.get("task_id")
112
+ question_text = item.get("question")
 
113
 
114
+ if not task_id or question_text is None:
115
+ print(f"⚠️ Skipping item {i} with missing task_id or question: {item}")
116
+ continue
 
 
 
 
 
 
 
 
 
 
 
 
117
 
118
+ print(f"πŸ”„ Processing question {i}/{len(questions_data)}: {task_id}")
119
 
120
+ try:
121
+ # Get detailed result from hybrid system
122
+ detailed_result = agent.hybrid_system.process_query(question_text)
123
+ submitted_answer = detailed_result.get("answer", "No response")
124
 
125
+ # Extract final answer
126
+ if "FINAL ANSWER:" in submitted_answer:
127
+ clean_answer = submitted_answer.split("FINAL ANSWER:")[-1].strip()
128
+ else:
129
+ clean_answer = submitted_answer.strip()
130
 
131
+ # Track performance metrics
132
+ provider = detailed_result.get("provider_used", "Unknown")
133
+ processing_time = detailed_result.get("performance_metrics", {}).get("total_time", 0)
134
 
135
+ # Update performance stats
136
+ if "LangGraph" in provider:
137
+ if "Math" in provider:
138
+ performance_stats["langgraph_math"] += 1
139
+ else:
140
+ performance_stats["langgraph_retrieval"] += 1
141
+ elif "Agno" in provider:
142
+ if "Research" in provider:
143
+ performance_stats["agno_research"] += 1
144
+ else:
145
+ performance_stats["agno_general"] += 1
 
 
 
 
 
146
 
147
+ performance_stats["total_processing_time"] += processing_time
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
148
 
149
+ answers_payload.append({"task_id": task_id, "submitted_answer": clean_answer})
150
+ results_log.append({
151
+ "Task ID": task_id,
152
+ "Question": question_text[:100] + "..." if len(question_text) > 100 else question_text,
153
+ "Submitted Answer": clean_answer,
154
+ "Provider": provider,
155
+ "Processing Time (s)": f"{processing_time:.2f}"
156
+ })
157
 
158
+ print(f"βœ… Question {i} processed successfully using {provider}")
 
159
 
160
+ except Exception as e:
161
+ print(f"❌ Error running agent on task {task_id}: {e}")
162
+ performance_stats["errors"] += 1
163
+ results_log.append({
164
+ "Task ID": task_id,
165
+ "Question": question_text[:100] + "..." if len(question_text) > 100 else question_text,
166
+ "Submitted Answer": f"AGENT ERROR: {e}",
167
+ "Provider": "Error",
168
+ "Processing Time (s)": "0.00"
169
+ })
170
+
171
+ if not answers_payload:
172
+ print("❌ Agent did not produce any answers to submit.")
173
+ return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
174
+
175
+ # 4. Performance Summary
176
+ avg_processing_time = performance_stats["total_processing_time"] / len(answers_payload) if answers_payload else 0
177
+ performance_summary = f"""
178
+ πŸ“Š Performance Summary:
179
+ β€’ LangGraph Math: {performance_stats['langgraph_math']} queries
180
+ β€’ Agno Research: {performance_stats['agno_research']} queries
181
+ β€’ LangGraph Retrieval: {performance_stats['langgraph_retrieval']} queries
182
+ β€’ Agno General: {performance_stats['agno_general']} queries
183
+ β€’ Errors: {performance_stats['errors']} queries
184
+ β€’ Average Processing Time: {avg_processing_time:.2f}s
185
+ β€’ Total Processing Time: {performance_stats['total_processing_time']:.2f}s
186
+ """
187
+ print(performance_summary)
188
+
189
+ # 5. Prepare Submission
190
+ submission_data = {
191
+ "username": username.strip(),
192
+ "agent_code": agent_code,
193
+ "answers": answers_payload
194
+ }
195
+ status_update = f"🎯 Hybrid Agent finished. Submitting {len(answers_payload)} answers for user '{username}'..."
196
+ print(status_update)
197
+
198
+ # 6. Submit Results
199
+ print(f"πŸ“€ Submitting {len(answers_payload)} answers to: {submit_url}")
200
+ try:
201
+ response = requests.post(submit_url, json=submission_data, timeout=120) # Increased timeout
202
+ response.raise_for_status()
203
+ result_data = response.json()
204
 
205
+ final_status = (
206
+ f"πŸŽ‰ Submission Successful!\n"
207
+ f"πŸ‘€ User: {result_data.get('username')}\n"
208
+ f"πŸ† Overall Score: {result_data.get('score', 'N/A')}% "
209
+ f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)\n"
210
+ f"πŸ’¬ Message: {result_data.get('message', 'No message received.')}\n"
211
+ f"{performance_summary}"
212
+ )
213
+ print("βœ… Submission successful.")
214
+ results_df = pd.DataFrame(results_log)
215
+ return final_status, results_df
 
 
 
 
 
 
 
 
 
 
 
216
 
217
+ except requests.exceptions.HTTPError as e:
218
+ error_detail = f"Server responded with status {e.response.status_code}."
219
+ try:
220
+ error_json = e.response.json()
221
+ error_detail += f" Detail: {error_json.get('detail', e.response.text)}"
222
+ except requests.exceptions.JSONDecodeError:
223
+ error_detail += f" Response: {e.response.text[:500]}"
224
+ status_message = f"❌ Submission Failed: {error_detail}"
225
+ print(status_message)
226
+ results_df = pd.DataFrame(results_log)
227
+ return status_message, results_df
228
 
229
+ except requests.exceptions.Timeout:
230
+ status_message = "❌ Submission Failed: The request timed out."
231
+ print(status_message)
232
+ results_df = pd.DataFrame(results_log)
233
+ return status_message, results_df
 
 
234
 
235
+ except requests.exceptions.RequestException as e:
236
+ status_message = f"❌ Submission Failed: Network error - {e}"
237
+ print(status_message)
238
+ results_df = pd.DataFrame(results_log)
239
+ return status_message, results_df
 
 
 
 
 
 
240
 
241
+ except Exception as e:
242
+ status_message = f"❌ An unexpected error occurred during submission: {e}"
243
+ print(status_message)
244
+ results_df = pd.DataFrame(results_log)
245
+ return status_message, results_df
246
+
247
+ # --- Enhanced Gradio Interface ---
248
+ with gr.Blocks(title="Enhanced Hybrid Agent Evaluation") as demo:
249
+ gr.Markdown("# πŸš€ Enhanced Hybrid LangGraph + Agno Agent Evaluation Runner")
250
+ gr.Markdown(
251
+ """
252
+ ## 🎯 **Advanced AI Agent System**
253
 
254
+ This evaluation runner uses a **Hybrid LangGraph + Agno Agent System** that combines the best of both frameworks:
 
 
 
 
 
255
 
256
+ ### 🧠 **Intelligent Routing System**
257
+ - **πŸ”’ Mathematical Queries** β†’ LangGraph (Groq Llama 3.3 70B) - *Optimized for speed*
258
+ - **πŸ” Complex Research** β†’ Agno (Gemini 2.0 Flash-Lite) - *Optimized for reasoning*
259
+ - **πŸ“š Factual Retrieval** β†’ LangGraph + FAISS Vector Store - *Optimized for accuracy*
260
+ - **🎭 General Queries** β†’ Agno Multi-Agent System - *Optimized for comprehensiveness*
 
 
 
261
 
262
+ ### ⚑ **Performance Features**
263
+ - **Rate Limiting**: Intelligent rate management for free tier models
264
+ - **Caching**: Performance optimization with query caching
265
+ - **Fallback Systems**: Automatic provider switching on failures
266
+ - **Performance Tracking**: Real-time metrics and provider usage stats
267
 
268
+ ### πŸ›  **Tools & Capabilities**
269
+ - Mathematical calculations (add, subtract, multiply, divide, modulus)
270
+ - Web search (Tavily, Wikipedia, ArXiv)
271
+ - FAISS vector database for similar question retrieval
272
+ - Memory persistence across sessions
273
+
274
+ ---
275
+
276
+ **Instructions:**
277
+ 1. πŸ” Log in to your Hugging Face account using the button below
278
+ 2. πŸš€ Click 'Run Evaluation & Submit All Answers' to start the evaluation
279
+ 3. πŸ“Š Monitor real-time performance metrics and provider usage
280
+ 4. πŸ† View your final score and detailed results
281
+
282
+ **Note:** The hybrid system automatically selects the optimal AI provider for each question type to maximize both speed and accuracy.
283
+ """
284
+ )
 
285
 
286
+ gr.LoginButton()
287
+
288
+ with gr.Row():
289
+ run_button = gr.Button(
290
+ "πŸš€ Run Evaluation & Submit All Answers",
291
+ variant="primary",
292
+ size="lg"
293
+ )
294
+
295
+ status_output = gr.Textbox(
296
+ label="πŸ“Š Run Status / Submission Result",
297
+ lines=10,
298
+ interactive=False,
299
+ placeholder="Status updates will appear here..."
300
+ )
301
+
302
+ results_table = gr.DataFrame(
303
+ label="πŸ“‹ Questions, Answers & Performance Metrics",
304
+ wrap=True,
305
+ height=400
306
+ )
307
+
308
+ run_button.click(
309
+ fn=run_and_submit_all,
310
+ outputs=[status_output, results_table]
311
+ )
312
 
313
+ # Add footer with system info
314
+ gr.Markdown(
315
+ """
316
+ ---
317
+ ### πŸ”§ **System Information**
318
+ - **Primary Models**: Groq Llama 3.3 70B, Gemini 2.0 Flash-Lite, NVIDIA Llama 3.1 70B
319
+ - **Frameworks**: LangGraph + Agno Hybrid Architecture
320
+ - **Vector Store**: FAISS with NVIDIA Embeddings
321
+ - **Rate Limiting**: Advanced rate management with exponential backoff
322
+ - **Memory**: Persistent agent memory with session summaries
323
+ """
324
+ )
325
 
 
326
  if __name__ == "__main__":
327
+ print("\n" + "="*80)
328
+ print("πŸš€ ENHANCED HYBRID AGENT EVALUATION RUNNER")
329
+ print("="*80)
330
 
331
+ # Check for environment variables
332
+ space_host_startup = os.getenv("SPACE_HOST")
333
+ space_id_startup = os.getenv("SPACE_ID")
334
+
335
+ if space_host_startup:
336
+ print(f"βœ… SPACE_HOST found: {space_host_startup}")
337
+ print(f" 🌐 Runtime URL: https://{space_host_startup}.hf.space")
338
+ else:
339
+ print("ℹ️ SPACE_HOST environment variable not found (running locally?).")
340
+
341
+ if space_id_startup:
342
+ print(f"βœ… SPACE_ID found: {space_id_startup}")
343
+ print(f" πŸ“ Repo URL: https://huggingface.co/spaces/{space_id_startup}")
344
+ print(f" 🌳 Repo Tree URL: https://huggingface.co/spaces/{space_id_startup}/tree/main")
345
+ else:
346
+ print("ℹ️ SPACE_ID environment variable not found (running locally?).")
347
+
348
+ print("\n🎯 System Features:")
349
+ print(" β€’ Hybrid LangGraph + Agno Architecture")
350
+ print(" β€’ Intelligent Query Routing")
351
+ print(" β€’ Performance Optimization")
352
+ print(" β€’ Advanced Rate Limiting")
353
+ print(" β€’ FAISS Vector Database")
354
+ print(" β€’ Multi-Provider Fallbacks")
355
 
356
+ print("\n" + "="*80)
357
+ print("πŸŽ‰ Launching Enhanced Gradio Interface...")
358
+ print("="*80 + "\n")
359
+
360
+ demo.launch(debug=True, share=False)