josondev commited on
Commit
4dcc8e2
·
verified ·
1 Parent(s): 41f9740

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +83 -61
app.py CHANGED
@@ -1,66 +1,58 @@
1
- """ Enhanced LangGraph Agent Evaluation Runner - Final Version"""
2
  import os
3
  import gradio as gr
4
  import requests
5
  import pandas as pd
6
  from langchain_core.messages import HumanMessage
7
- from veryfinal import build_graph
8
 
9
  # --- Constants ---
10
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
11
 
12
  # --- Enhanced Agent Definition ---
13
- class EnhancedLangGraphAgent:
14
- """Enhanced LangGraph agent with proper response handling."""
15
  def __init__(self):
16
- print("Enhanced LangGraph Agent initialized.")
17
  try:
18
- self.graph = build_graph(provider="groq")
19
- print("LangGraph built successfully.")
 
 
 
 
 
 
 
 
20
  except Exception as e:
21
  print(f"Error building graph: {e}")
22
  self.graph = None
 
23
 
24
  def __call__(self, question: str) -> str:
25
- print(f"Processing: {question[:100]}...")
26
 
27
- if self.graph is None:
28
  return "Error: Agent not properly initialized"
29
 
30
  try:
31
- # Create messages and config
32
- messages = [HumanMessage(content=question)]
33
- config = {"configurable": {"thread_id": f"eval_{hash(question)}"}}
34
 
35
- # Invoke the graph
36
- result = self.graph.invoke({"messages": messages}, config)
37
-
38
- # Extract the final answer
39
- if result and "messages" in result and result["messages"]:
40
- final_message = result["messages"][-1]
41
- if hasattr(final_message, 'content'):
42
- answer = final_message.content
43
- else:
44
- answer = str(final_message)
45
-
46
- # Clean up the answer
47
- if "FINAL ANSWER:" in answer:
48
- answer = answer.split("FINAL ANSWER:")[-1].strip()
49
-
50
- # Validate the answer
51
- if not answer or answer == question or len(answer.strip()) == 0:
52
- return "Information not available"
53
-
54
- return answer.strip()
55
- else:
56
  return "Information not available"
57
 
 
 
58
  except Exception as e:
59
- print(f"Error processing question: {e}")
60
- return f"Error: {str(e)}"
 
61
 
62
  def run_and_submit_all(profile: gr.OAuthProfile | None):
63
- """Fetch questions, run agent, and submit answers."""
64
  space_id = os.getenv("SPACE_ID")
65
 
66
  if profile:
@@ -76,7 +68,7 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
76
 
77
  # 1. Instantiate Agent
78
  try:
79
- agent = EnhancedLangGraphAgent()
80
  if agent.graph is None:
81
  return "Error: Failed to initialize agent properly", None
82
  except Exception as e:
@@ -84,6 +76,7 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
84
  return f"Error initializing agent: {e}", None
85
 
86
  agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main" if space_id else "No space ID available"
 
87
 
88
  # 2. Fetch Questions
89
  print(f"Fetching questions from: {questions_url}")
@@ -92,27 +85,35 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
92
  response.raise_for_status()
93
  questions_data = response.json()
94
  if not questions_data:
 
95
  return "Fetched questions list is empty or invalid format.", None
96
  print(f"Fetched {len(questions_data)} questions.")
97
  except Exception as e:
 
98
  return f"Error fetching questions: {e}", None
99
 
100
- # 3. Run Agent
101
  results_log = []
102
  answers_payload = []
103
- print(f"Running Enhanced LangGraph agent on {len(questions_data)} questions...")
104
 
105
  for i, item in enumerate(questions_data):
106
  task_id = item.get("task_id")
107
  question_text = item.get("question")
108
 
109
  if not task_id or question_text is None:
 
110
  continue
111
 
112
  print(f"Processing question {i+1}/{len(questions_data)}: {task_id}")
113
 
114
  try:
115
  submitted_answer = agent(question_text)
 
 
 
 
 
116
  answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
117
  results_log.append({
118
  "Task ID": task_id,
@@ -121,6 +122,7 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
121
  })
122
  except Exception as e:
123
  error_msg = f"AGENT ERROR: {e}"
 
124
  answers_payload.append({"task_id": task_id, "submitted_answer": error_msg})
125
  results_log.append({
126
  "Task ID": task_id,
@@ -129,12 +131,16 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
129
  })
130
 
131
  if not answers_payload:
 
132
  return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
133
 
134
- # 4. Submit
135
  submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
136
- print(f"Submitting {len(answers_payload)} answers...")
137
-
 
 
 
138
  try:
139
  response = requests.post(submit_url, json=submission_data, timeout=60)
140
  response.raise_for_status()
@@ -146,35 +152,51 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
146
  f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)\n"
147
  f"Message: {result_data.get('message', 'No message received.')}"
148
  )
149
- return final_status, pd.DataFrame(results_log)
 
 
150
  except Exception as e:
151
- return f"Submission Failed: {e}", pd.DataFrame(results_log)
 
 
 
152
 
153
- # --- Gradio Interface ---
154
  with gr.Blocks() as demo:
155
- gr.Markdown("# Enhanced LangGraph Agent - Final Version")
156
  gr.Markdown(
157
  """
158
- **Features:**
159
- - Proper LangGraph structure with tool integration
160
- - Multi-LLM support (Groq, Google, HuggingFace)
161
- - ✅ Enhanced search capabilities (Wikipedia, Tavily, ArXiv)
162
- - Mathematical tools for calculations
163
- - Vector store integration for similar questions
164
- - Proper response formatting and validation
165
- - Error handling and fallback mechanisms
 
 
 
 
166
 
167
- **Tools Available:**
168
- - Mathematical operations (add, subtract, multiply, divide, modulus)
169
- - Wikipedia search for encyclopedic information
170
- - Web search via Tavily for current information
171
- - ArXiv search for academic papers
172
- - Vector similarity search for related questions
 
 
 
 
 
173
  """
174
  )
175
 
176
  gr.LoginButton()
 
177
  run_button = gr.Button("Run Evaluation & Submit All Answers", variant="primary")
 
178
  status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
179
  results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
180
 
@@ -184,5 +206,5 @@ with gr.Blocks() as demo:
184
  )
185
 
186
  if __name__ == "__main__":
187
- print("\n" + "-"*30 + " Enhanced LangGraph Agent Starting " + "-"*30)
188
  demo.launch(debug=True, share=False)
 
1
+ """ Enhanced Multi-LLM Agent Evaluation Runner with Vector Database Integration"""
2
  import os
3
  import gradio as gr
4
  import requests
5
  import pandas as pd
6
  from langchain_core.messages import HumanMessage
7
+ from veryfinal import build_graph, HybridLangGraphMultiLLMSystem
8
 
9
  # --- Constants ---
10
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
11
 
12
  # --- Enhanced Agent Definition ---
13
+ class EnhancedMultiLLMAgent:
14
+ """A multi-provider LangGraph agent with vector database integration."""
15
  def __init__(self):
16
+ print("Enhanced Multi-LLM Agent with Vector Database initialized.")
17
  try:
18
+ self.system = HybridLangGraphMultiLLMSystem(provider="groq")
19
+ self.graph = self.system.graph
20
+
21
+ # Load metadata if available
22
+ if os.path.exists("metadata.jsonl"):
23
+ print("Loading question metadata...")
24
+ count = self.system.load_metadata_from_jsonl("metadata.jsonl")
25
+ print(f"Loaded {count} questions into vector database")
26
+
27
+ print("Enhanced Multi-LLM Graph built successfully.")
28
  except Exception as e:
29
  print(f"Error building graph: {e}")
30
  self.graph = None
31
+ self.system = None
32
 
33
  def __call__(self, question: str) -> str:
34
+ print(f"Agent received question: {question[:100]}...")
35
 
36
+ if self.graph is None or self.system is None:
37
  return "Error: Agent not properly initialized"
38
 
39
  try:
40
+ # Use the enhanced system's process_query method
41
+ answer = self.system.process_query(question)
 
42
 
43
+ # Additional validation
44
+ if not answer or answer == question or len(answer.strip()) == 0:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
45
  return "Information not available"
46
 
47
+ return answer.strip()
48
+
49
  except Exception as e:
50
+ error_msg = f"Error: {str(e)}"
51
+ print(error_msg)
52
+ return error_msg
53
 
54
  def run_and_submit_all(profile: gr.OAuthProfile | None):
55
+ """Fetch questions, run enhanced agent, and submit answers."""
56
  space_id = os.getenv("SPACE_ID")
57
 
58
  if profile:
 
68
 
69
  # 1. Instantiate Agent
70
  try:
71
+ agent = EnhancedMultiLLMAgent()
72
  if agent.graph is None:
73
  return "Error: Failed to initialize agent properly", None
74
  except Exception as e:
 
76
  return f"Error initializing agent: {e}", None
77
 
78
  agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main" if space_id else "No space ID available"
79
+ print(f"Agent code URL: {agent_code}")
80
 
81
  # 2. Fetch Questions
82
  print(f"Fetching questions from: {questions_url}")
 
85
  response.raise_for_status()
86
  questions_data = response.json()
87
  if not questions_data:
88
+ print("Fetched questions list is empty.")
89
  return "Fetched questions list is empty or invalid format.", None
90
  print(f"Fetched {len(questions_data)} questions.")
91
  except Exception as e:
92
+ print(f"Error fetching questions: {e}")
93
  return f"Error fetching questions: {e}", None
94
 
95
+ # 3. Run Enhanced Agent
96
  results_log = []
97
  answers_payload = []
98
+ print(f"Running Enhanced Multi-LLM agent with vector database on {len(questions_data)} questions...")
99
 
100
  for i, item in enumerate(questions_data):
101
  task_id = item.get("task_id")
102
  question_text = item.get("question")
103
 
104
  if not task_id or question_text is None:
105
+ print(f"Skipping item with missing task_id or question: {item}")
106
  continue
107
 
108
  print(f"Processing question {i+1}/{len(questions_data)}: {task_id}")
109
 
110
  try:
111
  submitted_answer = agent(question_text)
112
+
113
+ # Additional validation to prevent question repetition
114
+ if submitted_answer == question_text or submitted_answer.startswith(question_text):
115
+ submitted_answer = "Information not available"
116
+
117
  answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
118
  results_log.append({
119
  "Task ID": task_id,
 
122
  })
123
  except Exception as e:
124
  error_msg = f"AGENT ERROR: {e}"
125
+ print(f"Error running agent on task {task_id}: {e}")
126
  answers_payload.append({"task_id": task_id, "submitted_answer": error_msg})
127
  results_log.append({
128
  "Task ID": task_id,
 
131
  })
132
 
133
  if not answers_payload:
134
+ print("Agent did not produce any answers to submit.")
135
  return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
136
 
137
+ # 4. Prepare Submission
138
  submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
139
+ status_update = f"Enhanced Multi-LLM Agent finished. Submitting {len(answers_payload)} answers for user '{username}'..."
140
+ print(status_update)
141
+
142
+ # 5. Submit
143
+ print(f"Submitting {len(answers_payload)} answers to: {submit_url}")
144
  try:
145
  response = requests.post(submit_url, json=submission_data, timeout=60)
146
  response.raise_for_status()
 
152
  f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)\n"
153
  f"Message: {result_data.get('message', 'No message received.')}"
154
  )
155
+ print("Submission successful.")
156
+ results_df = pd.DataFrame(results_log)
157
+ return final_status, results_df
158
  except Exception as e:
159
+ status_message = f"Submission Failed: {e}"
160
+ print(status_message)
161
+ results_df = pd.DataFrame(results_log)
162
+ return status_message, results_df
163
 
164
+ # --- Build Gradio Interface ---
165
  with gr.Blocks() as demo:
166
+ gr.Markdown("# Enhanced Multi-LLM Agent with Vector Database Integration")
167
  gr.Markdown(
168
  """
169
+ **Instructions:**
170
+ 1. Log in to your Hugging Face account using the button below.
171
+ 2. Click 'Run Evaluation & Submit All Answers' to fetch questions, run your agent, submit answers, and see the score.
172
+
173
+ **Enhanced Agent Features:**
174
+ - **Multi-LLM Support**: Groq (Llama-3 8B/70B, DeepSeek)
175
+ - **Vector Database Integration**: FAISS + Supabase for similar question retrieval
176
+ - **Intelligent Routing**: Automatically selects best provider based on query complexity
177
+ - **Enhanced Tools**: Mathematical operations, web search, Wikipedia integration
178
+ - **Question-Answering**: Optimized for evaluation tasks with proper formatting
179
+ - **Similar Questions Context**: Uses vector similarity to provide relevant context
180
+ - **Error Handling**: Robust fallback mechanisms and comprehensive logging
181
 
182
+ **Routing Examples:**
183
+ - Math: "What is 25 multiplied by 17?" → Llama-3 70B
184
+ - Search: "Find information about Mercedes Sosa" → Search-Enhanced
185
+ - Complex: "Analyze quantum computing principles" DeepSeek
186
+ - Simple: "What is the capital of France?" → Llama-3 8B
187
+
188
+ **Vector Database Features:**
189
+ - Automatic loading of metadata.jsonl if present
190
+ - Similar question retrieval for enhanced context
191
+ - Supabase integration for persistent storage
192
+ - FAISS for fast vector similarity search
193
  """
194
  )
195
 
196
  gr.LoginButton()
197
+
198
  run_button = gr.Button("Run Evaluation & Submit All Answers", variant="primary")
199
+
200
  status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
201
  results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
202
 
 
206
  )
207
 
208
  if __name__ == "__main__":
209
+ print("\n" + "-"*30 + " Enhanced Multi-LLM Agent with Vector DB Starting " + "-"*30)
210
  demo.launch(debug=True, share=False)