Spaces:
Runtime error
Runtime error
update working
Browse files- .gitignore +2 -0
- app.py +332 -27
- requirements.txt +6 -3
.gitignore
ADDED
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
1 |
+
.env
|
2 |
+
.DS_store
|
app.py
CHANGED
@@ -1,59 +1,338 @@
|
|
1 |
import os
|
2 |
import gradio as gr
|
3 |
import requests
|
4 |
-
import
|
5 |
import pandas as pd
|
6 |
-
from typing import TypedDict,
|
7 |
from langchain_core.messages import AnyMessage
|
8 |
-
from langchain_huggingface import HuggingFaceEndpoint, ChatHuggingFace
|
9 |
from langgraph.graph.message import add_messages
|
10 |
from langchain_hyperbrowser import HyperbrowserBrowserUseTool
|
11 |
-
from langgraph.graph import START, StateGraph
|
12 |
from langgraph.prebuilt import ToolNode, tools_condition
|
13 |
from langchain_core.messages import HumanMessage, SystemMessage
|
14 |
-
from langchain_community.tools import
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
15 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
16 |
browser_tool = HyperbrowserBrowserUseTool()
|
17 |
-
search_tool =
|
18 |
-
|
19 |
-
|
20 |
-
|
21 |
-
|
22 |
-
|
23 |
-
|
24 |
-
|
25 |
-
|
26 |
-
|
27 |
-
|
28 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
29 |
response = llm_with_tools.invoke([system_message] + state["messages"])
|
30 |
print(response)
|
|
|
31 |
return {
|
32 |
-
"messages": response
|
|
|
|
|
33 |
}
|
34 |
|
35 |
-
workflow = StateGraph(
|
36 |
workflow.add_node("assistant", assistant)
|
37 |
workflow.add_node("tools", ToolNode(tools))
|
38 |
workflow.add_edge(START, "assistant")
|
39 |
workflow.add_conditional_edges("assistant", tools_condition)
|
40 |
workflow.add_edge("tools", "assistant")
|
41 |
app = workflow.compile()
|
42 |
-
|
43 |
-
# (Keep Constants as is)
|
44 |
-
# --- Constants ---
|
45 |
-
DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
|
46 |
|
47 |
# --- Basic Agent Definition ---
|
48 |
# ----- THIS IS WHERE YOU CAN BUILD WHAT YOU WANT ------
|
49 |
class BasicAgent:
|
50 |
def __init__(self):
|
51 |
print("BasicAgent initialized.")
|
52 |
-
def __call__(self, question: str) -> str:
|
53 |
print(f"Agent received question (first 50 chars): {question[:50]}...")
|
54 |
# fixed_answer = "This is a default answer."
|
55 |
messages = [HumanMessage(content=question)]
|
56 |
-
|
|
|
57 |
answer = answer["messages"][-1].content
|
58 |
# print(f"Agent returning fixed answer: {fixed_answer}")
|
59 |
print(f"Agent returning answer: {answer}")
|
@@ -116,11 +395,17 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
|
|
116 |
for item in questions_data:
|
117 |
task_id = item.get("task_id")
|
118 |
question_text = item.get("question")
|
|
|
|
|
|
|
|
|
|
|
119 |
if not task_id or question_text is None:
|
120 |
print(f"Skipping item with missing task_id or question: {item}")
|
121 |
continue
|
122 |
try:
|
123 |
-
|
|
|
124 |
answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
|
125 |
results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
|
126 |
except Exception as e:
|
@@ -235,6 +520,26 @@ if __name__ == "__main__":
|
|
235 |
print("ℹ️ SPACE_ID environment variable not found (running locally?). Repo URL cannot be determined.")
|
236 |
|
237 |
print("-"*(60 + len(" App Starting ")) + "\n")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
238 |
|
|
|
|
|
|
|
239 |
print("Launching Gradio Interface for Basic Agent Evaluation...")
|
240 |
-
demo.launch(debug=True, share=False)
|
|
|
|
|
|
|
|
1 |
import os
|
2 |
import gradio as gr
|
3 |
import requests
|
4 |
+
import base64
|
5 |
import pandas as pd
|
6 |
+
from typing import TypedDict, Annotated
|
7 |
from langchain_core.messages import AnyMessage
|
|
|
8 |
from langgraph.graph.message import add_messages
|
9 |
from langchain_hyperbrowser import HyperbrowserBrowserUseTool
|
10 |
+
from langgraph.graph import START, StateGraph
|
11 |
from langgraph.prebuilt import ToolNode, tools_condition
|
12 |
from langchain_core.messages import HumanMessage, SystemMessage
|
13 |
+
from langchain_community.tools import DuckDuckGoSearchRun
|
14 |
+
import whisper
|
15 |
+
import yt_dlp
|
16 |
+
import pandas as pd
|
17 |
+
from langchain.globals import set_debug
|
18 |
+
from langchain_community.tools.riza.command import ExecPython
|
19 |
+
from langchain_openai import ChatOpenAI
|
20 |
+
import cv2
|
21 |
+
import os
|
22 |
+
import shutil
|
23 |
+
import uuid
|
24 |
+
from langchain_tavily import TavilySearch
|
25 |
+
|
26 |
+
|
27 |
+
# set_debug(True)
|
28 |
+
|
29 |
+
# (Keep Constants as is)
|
30 |
+
# --- Constants ---
|
31 |
+
DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
|
32 |
+
|
33 |
+
class AgentState(TypedDict):
|
34 |
+
messages: Annotated[list[AnyMessage], add_messages]
|
35 |
+
task_id: str
|
36 |
+
has_file: bool
|
37 |
+
|
38 |
+
def get_file(task_id: str):
|
39 |
+
"""
|
40 |
+
Download a file locally for a given task.
|
41 |
+
"""
|
42 |
+
|
43 |
+
files_url = f"{DEFAULT_API_URL}/files/{task_id}"
|
44 |
+
try:
|
45 |
+
response = requests.get(files_url, timeout=20)
|
46 |
+
response.raise_for_status()
|
47 |
+
cd = response.headers.get("content-disposition")
|
48 |
+
filename = cd.split("filename=")[-1].strip('"')
|
49 |
+
with open(filename, "wb") as file:
|
50 |
+
file.write(response.content)
|
51 |
+
return filename
|
52 |
+
except Exception as e:
|
53 |
+
print(str(e))
|
54 |
+
return ""
|
55 |
+
|
56 |
+
def interpret_image(image_name: str, question: str):
|
57 |
+
"""
|
58 |
+
Interpret an image for analysis.
|
59 |
+
"""
|
60 |
+
|
61 |
+
vision_llm = ChatOpenAI(model="gpt-4o", temperature=0)
|
62 |
+
|
63 |
+
|
64 |
+
try:
|
65 |
+
with open(image_name, "rb") as file:
|
66 |
+
bytes = file.read()
|
67 |
+
|
68 |
+
base64_image = base64.b64encode(bytes).decode("utf-8")
|
69 |
+
messages = [HumanMessage(content=[
|
70 |
+
{
|
71 |
+
"type": "text",
|
72 |
+
"text": (
|
73 |
+
f"{question}"
|
74 |
+
)
|
75 |
+
},
|
76 |
+
{
|
77 |
+
"type": "image_url",
|
78 |
+
"image_url": {
|
79 |
+
"url": f"data:image/png;base64,{base64_image}"
|
80 |
+
|
81 |
+
}
|
82 |
+
}
|
83 |
+
])]
|
84 |
+
response = vision_llm.invoke(messages)
|
85 |
+
return response.content
|
86 |
+
except Exception as e:
|
87 |
+
print(str(e))
|
88 |
+
return ""
|
89 |
+
|
90 |
+
def transcribe_audio(file_name: str):
|
91 |
+
"""
|
92 |
+
Transcribes audio file.
|
93 |
+
"""
|
94 |
+
|
95 |
+
model = whisper.load_model("small")
|
96 |
+
result = model.transcribe(file_name)
|
97 |
+
return result["text"]
|
98 |
+
|
99 |
+
def download_youtube_video(url: str):
|
100 |
+
"""
|
101 |
+
Download a YouTube video.
|
102 |
+
"""
|
103 |
+
output_path = f"output_{uuid.uuid4()}"
|
104 |
+
|
105 |
+
ydl_opts = {
|
106 |
+
'format': 'bestvideo+bestaudio/best',
|
107 |
+
'outtmpl': output_path,
|
108 |
+
'merge_output_format': 'mp4', # Use mp4 as the final output format
|
109 |
+
'quiet': True,
|
110 |
+
}
|
111 |
+
|
112 |
+
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
|
113 |
+
ydl.download([url])
|
114 |
+
|
115 |
+
return output_path+".mp4"
|
116 |
+
|
117 |
+
def read_excel(file_name: str):
|
118 |
+
"""
|
119 |
+
Read the contents of an Excel file.
|
120 |
+
"""
|
121 |
+
|
122 |
+
df = pd.read_excel(file_name)
|
123 |
+
print(df.to_string(index=False))
|
124 |
+
return df.to_string(index=False)
|
125 |
+
|
126 |
+
def read_file(file_name: str):
|
127 |
+
"""
|
128 |
+
Read the content of a text-based file.
|
129 |
+
"""
|
130 |
+
with open(file_name, 'r') as file:
|
131 |
+
content = file.read()
|
132 |
+
return content
|
133 |
|
134 |
+
def watch_video(file_name: str):
|
135 |
+
"""
|
136 |
+
Extract frames from a video and interpret them.
|
137 |
+
"""
|
138 |
+
|
139 |
+
if os.path.exists("extracted_frames"):
|
140 |
+
shutil.rmtree("extracted_frames")
|
141 |
+
|
142 |
+
os.makedirs("extracted_frames")
|
143 |
+
|
144 |
+
cap = cv2.VideoCapture(file_name)
|
145 |
+
fps = cap.get(cv2.CAP_PROP_FPS)
|
146 |
+
frame_interval = int(fps * 5)
|
147 |
+
|
148 |
+
frame_count = 0
|
149 |
+
saved_count = 0
|
150 |
+
|
151 |
+
while True:
|
152 |
+
ret, frame = cap.read()
|
153 |
+
if not ret:
|
154 |
+
break
|
155 |
+
|
156 |
+
if frame_count % frame_interval == 0:
|
157 |
+
filename = os.path.join("extracted_frames", f"frame_{saved_count:04d}.jpg")
|
158 |
+
cv2.imwrite(filename, frame)
|
159 |
+
saved_count+=1
|
160 |
+
|
161 |
+
frame_count+=1
|
162 |
+
|
163 |
+
cap.release()
|
164 |
+
print(f"Saved {saved_count}")
|
165 |
+
|
166 |
+
captions = []
|
167 |
+
|
168 |
+
for file in sorted(os.listdir("extracted_frames")):
|
169 |
+
file_path = os.path.join("extracted_frames", file)
|
170 |
+
caption = interpret_image(file_path, "Return a one line description of the image.")
|
171 |
+
print(caption)
|
172 |
+
captions.append(caption)
|
173 |
+
|
174 |
+
print(captions)
|
175 |
+
return captions
|
176 |
+
|
177 |
+
|
178 |
browser_tool = HyperbrowserBrowserUseTool()
|
179 |
+
# search_tool = DuckDuckGoSearchRun()
|
180 |
+
search_tool = TavilySearch()
|
181 |
+
code_executor_tool = ExecPython()
|
182 |
+
tools = [search_tool, code_executor_tool, interpret_image, get_file, transcribe_audio, download_youtube_video, read_file, watch_video, read_excel]
|
183 |
+
llm = ChatOpenAI(model="gpt-4o", temperature=0)
|
184 |
+
llm_with_tools = llm.bind_tools(tools)
|
185 |
+
|
186 |
+
def assistant(state:AgentState):
|
187 |
+
|
188 |
+
task_id = state["task_id"]
|
189 |
+
|
190 |
+
image_tool_description = """
|
191 |
+
interpret_image(image_name: str) -> str:
|
192 |
+
Interpret an image for analysis.
|
193 |
+
|
194 |
+
Args:
|
195 |
+
image_name: Name of the downloaded image file as string.
|
196 |
+
question: Question about the image as string.
|
197 |
+
|
198 |
+
Returns:
|
199 |
+
An interpretation of the image as string.
|
200 |
+
"""
|
201 |
+
|
202 |
+
download_file_tool_description = """
|
203 |
+
get_file(task_id: str) -> str:
|
204 |
+
Download a file locally for a given task.
|
205 |
+
|
206 |
+
Args:
|
207 |
+
task_id: The ID of the current task as string.
|
208 |
+
|
209 |
+
Returns:
|
210 |
+
The name of the downloaded file as string.
|
211 |
+
"""
|
212 |
+
|
213 |
+
audio_tool_description = """
|
214 |
+
transcribe_audio(file_name: str) -> str:
|
215 |
+
Transcribe an audio file.
|
216 |
+
|
217 |
+
Args:
|
218 |
+
file_name: The name of the audio file as string.
|
219 |
+
|
220 |
+
Returns:
|
221 |
+
The transcription of the audio as string.
|
222 |
+
"""
|
223 |
+
|
224 |
+
download_youtube_video_description = """
|
225 |
+
download_youtube_video(url: str, output_path: str):
|
226 |
+
Downloads a YouTube video.
|
227 |
+
|
228 |
+
Args:
|
229 |
+
url: URL of the YouTube video as string.
|
230 |
+
|
231 |
+
Returns:
|
232 |
+
The output path for the file.
|
233 |
+
"""
|
234 |
+
|
235 |
+
excel_tool_description = """
|
236 |
+
read_excel(file_name: str) -> str:
|
237 |
+
Read the content of an Excel file.
|
238 |
+
|
239 |
+
Args:
|
240 |
+
file_name: The name of the Excel file as string.
|
241 |
+
|
242 |
+
Returns:
|
243 |
+
A string representation of the content of the file.
|
244 |
+
"""
|
245 |
+
|
246 |
+
read_file_tool_description = """
|
247 |
+
read_file(file_name: str) -> str:
|
248 |
+
Read the content of a text-based file.
|
249 |
+
|
250 |
+
Args:
|
251 |
+
file_name: The name of the file as string.
|
252 |
+
|
253 |
+
Returns:
|
254 |
+
A string containing the content of the file.
|
255 |
+
"""
|
256 |
+
|
257 |
+
watch_video_tool_description = """
|
258 |
+
watch_video(file_name: str) -> str:
|
259 |
+
Extract frames from a video and interpret them.
|
260 |
+
|
261 |
+
Args:
|
262 |
+
file_name: The name of the file as string.
|
263 |
+
|
264 |
+
Returns:
|
265 |
+
A list of captions for each frame.
|
266 |
+
"""
|
267 |
+
|
268 |
+
search_tool_description = search_tool.description
|
269 |
+
code_executor_tool_description = code_executor_tool.description
|
270 |
+
|
271 |
+
has_file = state["has_file"]
|
272 |
+
|
273 |
+
system_message = SystemMessage(content=f"""
|
274 |
+
You are a general AI assistant. I will ask you a question.
|
275 |
+
|
276 |
+
Your response should be a number, OR as few words as possible, OR a comma-separated list of numbers and/or strings.
|
277 |
+
You SHOULD NOT provide explanations in your response.
|
278 |
+
If you are asked for a number, don't use a comma to write your number, neither use symbols such as $ or % unless specified otherwise.
|
279 |
+
If you are asked for a string, don't use articles, neither abbreviations (e.g. for cities).
|
280 |
+
If you are asked for a comma separated list, apply the above rules depending of whether the element to be put in the list is a number or a string.
|
281 |
+
If you are including text from the question in your response, make sure to include the text exactly as it appears in the question (e.g. with adjective).
|
282 |
+
Do NOT end your response with a period.
|
283 |
+
Do NOT write numbers as text.
|
284 |
+
|
285 |
+
You have access to the following tools, which you can use as needed to answer a question:
|
286 |
+
- File downloading tool: {download_file_tool_description}
|
287 |
+
- Image interpretation tool: {image_tool_description}
|
288 |
+
- YouTube video downloader: {download_youtube_video_description}
|
289 |
+
- Audio transcription tool: {audio_tool_description}
|
290 |
+
- Text-based file reading tool: {read_file_tool_description}
|
291 |
+
- Internet search tool: {search_tool_description}
|
292 |
+
- Code execution tool: {code_executor_tool_description}
|
293 |
+
- Watch video tool: {watch_video_tool_description}
|
294 |
+
- Read Excel file tool: {excel_tool_description}
|
295 |
+
|
296 |
+
You may download a file for a given task ONLY if it has a file by using its associated task ID.
|
297 |
+
Always ensure you have downloaded a file before using a relevant tool.
|
298 |
+
You MUST use the name of a particular downloaded file in your tool call. Do NOT use a file name mentioned in the question.
|
299 |
+
When asked about a YouTube video, you can watch it and/or hear it.
|
300 |
+
When writing code, avoid excess formatting and keep it clean.
|
301 |
+
Do NOT make up answers, instead use a tool to answer the question.
|
302 |
+
|
303 |
+
The current task ID is {task_id}.
|
304 |
+
The current task has a file: {has_file}
|
305 |
+
""")
|
306 |
+
|
307 |
response = llm_with_tools.invoke([system_message] + state["messages"])
|
308 |
print(response)
|
309 |
+
print("\n\n")
|
310 |
return {
|
311 |
+
"messages": [response],
|
312 |
+
"task_id": task_id,
|
313 |
+
"has_file": has_file
|
314 |
}
|
315 |
|
316 |
+
workflow = StateGraph(AgentState)
|
317 |
workflow.add_node("assistant", assistant)
|
318 |
workflow.add_node("tools", ToolNode(tools))
|
319 |
workflow.add_edge(START, "assistant")
|
320 |
workflow.add_conditional_edges("assistant", tools_condition)
|
321 |
workflow.add_edge("tools", "assistant")
|
322 |
app = workflow.compile()
|
323 |
+
|
|
|
|
|
|
|
324 |
|
325 |
# --- Basic Agent Definition ---
|
326 |
# ----- THIS IS WHERE YOU CAN BUILD WHAT YOU WANT ------
|
327 |
class BasicAgent:
|
328 |
def __init__(self):
|
329 |
print("BasicAgent initialized.")
|
330 |
+
def __call__(self, question: str, task_id: str, has_file: bool) -> str:
|
331 |
print(f"Agent received question (first 50 chars): {question[:50]}...")
|
332 |
# fixed_answer = "This is a default answer."
|
333 |
messages = [HumanMessage(content=question)]
|
334 |
+
state = {"messages": messages, "task_id": task_id, "has_file": has_file}
|
335 |
+
answer = app.invoke(state)
|
336 |
answer = answer["messages"][-1].content
|
337 |
# print(f"Agent returning fixed answer: {fixed_answer}")
|
338 |
print(f"Agent returning answer: {answer}")
|
|
|
395 |
for item in questions_data:
|
396 |
task_id = item.get("task_id")
|
397 |
question_text = item.get("question")
|
398 |
+
has_file=False
|
399 |
+
|
400 |
+
if item.get("file_name"):
|
401 |
+
has_file=True
|
402 |
+
|
403 |
if not task_id or question_text is None:
|
404 |
print(f"Skipping item with missing task_id or question: {item}")
|
405 |
continue
|
406 |
try:
|
407 |
+
print(task_id)
|
408 |
+
submitted_answer = agent(question_text, task_id, has_file)
|
409 |
answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
|
410 |
results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
|
411 |
except Exception as e:
|
|
|
520 |
print("ℹ️ SPACE_ID environment variable not found (running locally?). Repo URL cannot be determined.")
|
521 |
|
522 |
print("-"*(60 + len(" App Starting ")) + "\n")
|
523 |
+
|
524 |
+
# try:
|
525 |
+
# random_url = f"{DEFAULT_API_URL}/random-question"
|
526 |
+
# response = requests.get(random_url, timeout=20)
|
527 |
+
# response.raise_for_status()
|
528 |
+
# question = response.json()
|
529 |
+
# print(question)
|
530 |
+
# agent = BasicAgent()
|
531 |
+
# print(question.get("question"))
|
532 |
+
# print(question.get("task_id"))
|
533 |
+
# has_file=False
|
534 |
+
# if question.get("file_name"):
|
535 |
+
# has_file=True
|
536 |
+
# print(agent(question.get("question"), question.get("task_id"), has_file))
|
537 |
|
538 |
+
# except Exception as e:
|
539 |
+
# print(str(e))
|
540 |
+
|
541 |
print("Launching Gradio Interface for Basic Agent Evaluation...")
|
542 |
+
demo.launch(debug=True, share=False)
|
543 |
+
|
544 |
+
|
545 |
+
|
requirements.txt
CHANGED
@@ -2,7 +2,10 @@ gradio
|
|
2 |
requests
|
3 |
langchain_core
|
4 |
langgraph
|
5 |
-
langchain-huggingface
|
6 |
-
langchain-hyperbrowser
|
7 |
duckduckgo-search
|
8 |
-
langchain-community
|
|
|
|
|
|
|
|
|
|
|
|
2 |
requests
|
3 |
langchain_core
|
4 |
langgraph
|
|
|
|
|
5 |
duckduckgo-search
|
6 |
+
langchain-community
|
7 |
+
openai-whisper
|
8 |
+
yt-dlp
|
9 |
+
rizaio
|
10 |
+
langchain-openai
|
11 |
+
langchain-tavily
|