jarguello76 commited on
Commit
f6d78aa
·
verified ·
1 Parent(s): b470a55

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +228 -55
app.py CHANGED
@@ -1,66 +1,239 @@
 
 
 
 
 
1
  import datasets
2
  from langchain.docstore.document import Document
3
  from langchain.text_splitter import RecursiveCharacterTextSplitter
4
  from langchain_community.retrievers import BM25Retriever
 
5
  from smolagents import Tool, CodeAgent, InferenceClientModel
6
 
7
- # Define RetrieverTool exactly as in your example
8
- class RetrieverTool(Tool):
9
- name = "retriever"
10
- description = (
11
- "Uses lexical search to retrieve the parts of transformers documentation most relevant to answer your query."
12
- )
13
- inputs = {
14
- "query": {
15
- "type": "string",
16
- "description": "The query to perform. Use affirmative form rather than a question.",
17
- }
18
- }
19
- output_type = "string"
20
-
21
- def __init__(self, docs, **kwargs):
22
- super().__init__(**kwargs)
23
- self.retriever = BM25Retriever.from_documents(docs, k=10)
24
-
25
- def forward(self, query: str) -> str:
26
- assert isinstance(query, str), "Query must be a string"
27
- docs = self.retriever.invoke(query)
28
- return "\nRetrieved documents:\n" + "".join(
29
- [f"\n\n===== Document {i} =====\n{doc.page_content}" for i, doc in enumerate(docs)]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
30
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
31
 
32
- # Replace agent creation inside try block
33
- try:
34
- # Load and prepare docs once here
35
- knowledge_base = datasets.load_dataset("m-ric/huggingface_doc", split="train")
36
- knowledge_base = knowledge_base.filter(lambda row: row["source"].startswith("huggingface/transformers"))
37
-
38
- source_docs = [
39
- Document(page_content=doc["text"], metadata={"source": doc["source"].split("/")[1]})
40
- for doc in knowledge_base
41
- ]
42
-
43
- text_splitter = RecursiveCharacterTextSplitter(
44
- chunk_size=500,
45
- chunk_overlap=50,
46
- add_start_index=True,
47
- strip_whitespace=True,
48
- separators=["\n\n", "\n", ".", " ", ""],
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
49
  )
50
- docs_processed = text_splitter.split_documents(source_docs)
51
-
52
- # Instantiate RetrieverTool with processed docs
53
- retriever_tool = RetrieverTool(docs_processed)
54
-
55
- # Instantiate the smolagents CodeAgent with model
56
- agent = CodeAgent(
57
- tools=[retriever_tool],
58
- model=InferenceClientModel(model_id="Qwen/Qwen2.5-Coder-32B-Instruct"),
59
- max_steps=4,
60
- verbosity_level=2,
61
- stream_outputs=True,
62
  )
63
 
64
- except Exception as e:
65
- print(f"Error instantiating agent: {e}")
66
- return f"Error initializing agent: {e}", None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import requests
3
+ import pandas as pd
4
+ import gradio as gr
5
+
6
  import datasets
7
  from langchain.docstore.document import Document
8
  from langchain.text_splitter import RecursiveCharacterTextSplitter
9
  from langchain_community.retrievers import BM25Retriever
10
+
11
  from smolagents import Tool, CodeAgent, InferenceClientModel
12
 
13
+
14
+ def run_and_submit_all(profile: gr.OAuthProfile | None):
15
+ """
16
+ Fetches all questions, runs the agent on them, submits all answers,
17
+ and displays the results.
18
+ """
19
+ space_id = os.getenv("SPACE_ID") # Get the SPACE_ID for sending link to the code
20
+
21
+ if profile:
22
+ username = f"{profile.username}"
23
+ print(f"User logged in: {username}")
24
+ else:
25
+ print("User not logged in.")
26
+ return "Please Login to Hugging Face with the button.", None
27
+
28
+ api_url = "https://your-api-url" # Replace with your actual API URL or set as env var
29
+ questions_url = f"{api_url}/questions"
30
+ submit_url = f"{api_url}/submit"
31
+
32
+ # --- Instantiate Agent ---
33
+ try:
34
+ # Load knowledge base dataset and filter
35
+ knowledge_base = datasets.load_dataset("m-ric/huggingface_doc", split="train")
36
+ knowledge_base = knowledge_base.filter(lambda row: row["source"].startswith("huggingface/transformers"))
37
+
38
+ # Create source Documents for retriever
39
+ source_docs = [
40
+ Document(page_content=doc["text"], metadata={"source": doc["source"].split("/")[1]})
41
+ for doc in knowledge_base
42
+ ]
43
+
44
+ # Split documents into chunks
45
+ text_splitter = RecursiveCharacterTextSplitter(
46
+ chunk_size=500,
47
+ chunk_overlap=50,
48
+ add_start_index=True,
49
+ strip_whitespace=True,
50
+ separators=["\n\n", "\n", ".", " ", ""],
51
  )
52
+ docs_processed = text_splitter.split_documents(source_docs)
53
+
54
+ # Define the Retriever tool
55
+ class RetrieverTool(Tool):
56
+ name = "retriever"
57
+ description = (
58
+ "Uses lexical search to retrieve the parts of transformers documentation "
59
+ "that could be most relevant to answer your query."
60
+ )
61
+ inputs = {
62
+ "query": {
63
+ "type": "string",
64
+ "description": (
65
+ "The query to perform. This should be lexically close to your target documents. "
66
+ "Use the affirmative form rather than a question."
67
+ ),
68
+ }
69
+ }
70
+ output_type = "string"
71
+
72
+ def __init__(self, docs, **kwargs):
73
+ super().__init__(**kwargs)
74
+ self.retriever = BM25Retriever.from_documents(docs, k=10)
75
+
76
+ def forward(self, query: str) -> str:
77
+ assert isinstance(query, str), "Your search query must be a string"
78
+ docs = self.retriever.invoke(query)
79
+ return "\nRetrieved documents:\n" + "".join(
80
+ [f"\n\n===== Document {i} =====\n" + doc.page_content for i, doc in enumerate(docs)]
81
+ )
82
+
83
+ retriever_tool = RetrieverTool(docs_processed)
84
+
85
+ # Instantiate the smolagents CodeAgent
86
+ agent = CodeAgent(
87
+ tools=[retriever_tool],
88
+ model=InferenceClientModel(model_id="Qwen/Qwen2.5-Coder-32B-Instruct"),
89
+ max_steps=4,
90
+ verbosity_level=2,
91
+ stream_outputs=True,
92
+ )
93
+
94
+ except Exception as e:
95
+ return f"Error initializing agent: {e}", None
96
+
97
+ agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main" if space_id else "Code repo URL not available"
98
+ print(agent_code)
99
 
100
+ # --- Fetch Questions ---
101
+ print(f"Fetching questions from: {questions_url}")
102
+ try:
103
+ response = requests.get(questions_url, timeout=15)
104
+ response.raise_for_status()
105
+ questions_data = response.json()
106
+ if not questions_data:
107
+ print("Fetched questions list is empty.")
108
+ return "Fetched questions list is empty or invalid format.", None
109
+ print(f"Fetched {len(questions_data)} questions.")
110
+ except requests.exceptions.RequestException as e:
111
+ print(f"Error fetching questions: {e}")
112
+ return f"Error fetching questions: {e}", None
113
+ except Exception as e:
114
+ print(f"An unexpected error occurred fetching questions: {e}")
115
+ return f"An unexpected error occurred fetching questions: {e}", None
116
+
117
+ # --- Run Agent on Questions ---
118
+ results_log = []
119
+ answers_payload = []
120
+ print(f"Running agent on {len(questions_data)} questions...")
121
+ for item in questions_data:
122
+ task_id = item.get("task_id")
123
+ question_text = item.get("question")
124
+ if not task_id or question_text is None:
125
+ print(f"Skipping item with missing task_id or question: {item}")
126
+ continue
127
+ try:
128
+ # Run the agent
129
+ submitted_answer = agent.run(question_text) # Use .run() for smolagents CodeAgent
130
+ answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
131
+ results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
132
+ except Exception as e:
133
+ print(f"Error running agent on task {task_id}: {e}")
134
+ results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"AGENT ERROR: {e}"})
135
+
136
+ if not answers_payload:
137
+ print("Agent did not produce any answers to submit.")
138
+ return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
139
+
140
+ # --- Prepare Submission ---
141
+ submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
142
+ status_update = f"Agent finished. Submitting {len(answers_payload)} answers for user '{username}'..."
143
+ print(status_update)
144
+
145
+ # --- Submit Answers ---
146
+ print(f"Submitting {len(answers_payload)} answers to: {submit_url}")
147
+ try:
148
+ response = requests.post(submit_url, json=submission_data, timeout=60)
149
+ response.raise_for_status()
150
+ result_data = response.json()
151
+ final_status = (
152
+ f"Submission Successful!\n"
153
+ f"User: {result_data.get('username')}\n"
154
+ f"Overall Score: {result_data.get('score', 'N/A')}% "
155
+ f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)\n"
156
+ f"Message: {result_data.get('message', 'No message received.')}"
157
+ )
158
+ print("Submission successful.")
159
+ results_df = pd.DataFrame(results_log)
160
+ return final_status, results_df
161
+ except requests.exceptions.HTTPError as e:
162
+ error_detail = f"Server responded with status {e.response.status_code}."
163
+ try:
164
+ error_json = e.response.json()
165
+ error_detail += f" Detail: {error_json.get('detail', e.response.text)}"
166
+ except requests.exceptions.JSONDecodeError:
167
+ error_detail += f" Response: {e.response.text[:500]}"
168
+ status_message = f"Submission Failed: {error_detail}"
169
+ print(status_message)
170
+ results_df = pd.DataFrame(results_log)
171
+ return status_message, results_df
172
+ except requests.exceptions.Timeout:
173
+ status_message = "Submission Failed: The request timed out."
174
+ print(status_message)
175
+ results_df = pd.DataFrame(results_log)
176
+ return status_message, results_df
177
+ except requests.exceptions.RequestException as e:
178
+ status_message = f"Submission Failed: Network error - {e}"
179
+ print(status_message)
180
+ results_df = pd.DataFrame(results_log)
181
+ return status_message, results_df
182
+ except Exception as e:
183
+ status_message = f"An unexpected error occurred during submission: {e}"
184
+ print(status_message)
185
+ results_df = pd.DataFrame(results_log)
186
+ return status_message, results_df
187
+
188
+
189
+ # --- Build Gradio Interface using Blocks ---
190
+ with gr.Blocks() as demo:
191
+ gr.Markdown("# Basic Agent Evaluation Runner")
192
+ gr.Markdown(
193
+ """
194
+ **Instructions:**
195
+ 1. Please clone this space, then modify the code to define your agent's logic, the tools, the necessary packages, etc ...
196
+ 2. Log in to your Hugging Face account using the button below. This uses your HF username for submission.
197
+ 3. Click 'Run Evaluation & Submit All Answers' to fetch questions, run your agent, submit answers, and see the score.
198
+ ---
199
+ **Disclaimers:**
200
+ Once clicking on the "submit" button, it can take quite some time (this is the time for the agent to go through all the questions).
201
+ This space provides a basic setup and is intentionally sub-optimal to encourage you to develop your own, more robust solution. For instance for the delay process of the submit button, a solution could be to cache the answers and submit in a separate action or even to answer the questions asynchronously.
202
+ """
203
  )
204
+
205
+ gr.LoginButton()
206
+
207
+ run_button = gr.Button("Run Evaluation & Submit All Answers")
208
+
209
+ status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
210
+ results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
211
+
212
+ run_button.click(
213
+ fn=run_and_submit_all,
214
+ outputs=[status_output, results_table]
 
215
  )
216
 
217
+
218
+ if __name__ == "__main__":
219
+ print("\n" + "-" * 30 + " App Starting " + "-" * 30)
220
+ space_host_startup = os.getenv("SPACE_HOST")
221
+ space_id_startup = os.getenv("SPACE_ID")
222
+
223
+ if space_host_startup:
224
+ print(f"✅ SPACE_HOST found: {space_host_startup}")
225
+ print(f" Runtime URL should be: https://{space_host_startup}.hf.space")
226
+ else:
227
+ print("ℹ️ SPACE_HOST environment variable not found (running locally?).")
228
+
229
+ if space_id_startup:
230
+ print(f"✅ SPACE_ID found: {space_id_startup}")
231
+ print(f" Repo URL: https://huggingface.co/spaces/{space_id_startup}")
232
+ print(f" Repo Tree URL: https://huggingface.co/spaces/{space_id_startup}/tree/main")
233
+ else:
234
+ print("ℹ️ SPACE_ID environment variable not found (running locally?). Repo URL cannot be determined.")
235
+
236
+ print("-" * (60 + len(" App Starting ")) + "\n")
237
+
238
+ print("Launching Gradio Interface for Basic Agent Evaluation...")
239
+ demo.launch(debug=True, share=False)