itskavya commited on
Commit
06a4d84
·
1 Parent(s): 231a8e0

update working

Browse files
Files changed (3) hide show
  1. .gitignore +2 -0
  2. app.py +332 -27
  3. requirements.txt +6 -3
.gitignore ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ .env
2
+ .DS_store
app.py CHANGED
@@ -1,59 +1,338 @@
1
  import os
2
  import gradio as gr
3
  import requests
4
- import inspect
5
  import pandas as pd
6
- from typing import TypedDict, Optional, Annotated
7
  from langchain_core.messages import AnyMessage
8
- from langchain_huggingface import HuggingFaceEndpoint, ChatHuggingFace
9
  from langgraph.graph.message import add_messages
10
  from langchain_hyperbrowser import HyperbrowserBrowserUseTool
11
- from langgraph.graph import START, StateGraph, MessagesState
12
  from langgraph.prebuilt import ToolNode, tools_condition
13
  from langchain_core.messages import HumanMessage, SystemMessage
14
- from langchain_community.tools import DuckDuckGoSearchResults
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
15
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
16
  browser_tool = HyperbrowserBrowserUseTool()
17
- search_tool = DuckDuckGoSearchResults()
18
- tools = [browser_tool, search_tool]
19
- llm = HuggingFaceEndpoint(repo_id="Qwen/Qwen2.5-Coder-32B-Instruct")
20
- chat = ChatHuggingFace(llm=llm)
21
- llm_with_tools = chat.bind_tools(tools)
22
-
23
- class State(TypedDict):
24
- messages = Annotated[list[AnyMessage], add_messages]
25
-
26
- def assistant(state:MessagesState):
27
- system_message = SystemMessage(content="You are a helpful assistant. Your job is to answer the questions asked of you as accurately as possible. You have access to search and browser tools, which you may use when needed to answer a question.")
28
- print(state["messages"])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
29
  response = llm_with_tools.invoke([system_message] + state["messages"])
30
  print(response)
 
31
  return {
32
- "messages": response
 
 
33
  }
34
 
35
- workflow = StateGraph(state_schema=MessagesState)
36
  workflow.add_node("assistant", assistant)
37
  workflow.add_node("tools", ToolNode(tools))
38
  workflow.add_edge(START, "assistant")
39
  workflow.add_conditional_edges("assistant", tools_condition)
40
  workflow.add_edge("tools", "assistant")
41
  app = workflow.compile()
42
-
43
- # (Keep Constants as is)
44
- # --- Constants ---
45
- DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
46
 
47
  # --- Basic Agent Definition ---
48
  # ----- THIS IS WHERE YOU CAN BUILD WHAT YOU WANT ------
49
  class BasicAgent:
50
  def __init__(self):
51
  print("BasicAgent initialized.")
52
- def __call__(self, question: str) -> str:
53
  print(f"Agent received question (first 50 chars): {question[:50]}...")
54
  # fixed_answer = "This is a default answer."
55
  messages = [HumanMessage(content=question)]
56
- answer = app.invoke({"messages": messages})
 
57
  answer = answer["messages"][-1].content
58
  # print(f"Agent returning fixed answer: {fixed_answer}")
59
  print(f"Agent returning answer: {answer}")
@@ -116,11 +395,17 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
116
  for item in questions_data:
117
  task_id = item.get("task_id")
118
  question_text = item.get("question")
 
 
 
 
 
119
  if not task_id or question_text is None:
120
  print(f"Skipping item with missing task_id or question: {item}")
121
  continue
122
  try:
123
- submitted_answer = agent(question_text)
 
124
  answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
125
  results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
126
  except Exception as e:
@@ -235,6 +520,26 @@ if __name__ == "__main__":
235
  print("ℹ️ SPACE_ID environment variable not found (running locally?). Repo URL cannot be determined.")
236
 
237
  print("-"*(60 + len(" App Starting ")) + "\n")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
238
 
 
 
 
239
  print("Launching Gradio Interface for Basic Agent Evaluation...")
240
- demo.launch(debug=True, share=False)
 
 
 
 
1
  import os
2
  import gradio as gr
3
  import requests
4
+ import base64
5
  import pandas as pd
6
+ from typing import TypedDict, Annotated
7
  from langchain_core.messages import AnyMessage
 
8
  from langgraph.graph.message import add_messages
9
  from langchain_hyperbrowser import HyperbrowserBrowserUseTool
10
+ from langgraph.graph import START, StateGraph
11
  from langgraph.prebuilt import ToolNode, tools_condition
12
  from langchain_core.messages import HumanMessage, SystemMessage
13
+ from langchain_community.tools import DuckDuckGoSearchRun
14
+ import whisper
15
+ import yt_dlp
16
+ import pandas as pd
17
+ from langchain.globals import set_debug
18
+ from langchain_community.tools.riza.command import ExecPython
19
+ from langchain_openai import ChatOpenAI
20
+ import cv2
21
+ import os
22
+ import shutil
23
+ import uuid
24
+ from langchain_tavily import TavilySearch
25
+
26
+
27
+ # set_debug(True)
28
+
29
+ # (Keep Constants as is)
30
+ # --- Constants ---
31
+ DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
32
+
33
+ class AgentState(TypedDict):
34
+ messages: Annotated[list[AnyMessage], add_messages]
35
+ task_id: str
36
+ has_file: bool
37
+
38
+ def get_file(task_id: str):
39
+ """
40
+ Download a file locally for a given task.
41
+ """
42
+
43
+ files_url = f"{DEFAULT_API_URL}/files/{task_id}"
44
+ try:
45
+ response = requests.get(files_url, timeout=20)
46
+ response.raise_for_status()
47
+ cd = response.headers.get("content-disposition")
48
+ filename = cd.split("filename=")[-1].strip('"')
49
+ with open(filename, "wb") as file:
50
+ file.write(response.content)
51
+ return filename
52
+ except Exception as e:
53
+ print(str(e))
54
+ return ""
55
+
56
+ def interpret_image(image_name: str, question: str):
57
+ """
58
+ Interpret an image for analysis.
59
+ """
60
+
61
+ vision_llm = ChatOpenAI(model="gpt-4o", temperature=0)
62
+
63
+
64
+ try:
65
+ with open(image_name, "rb") as file:
66
+ bytes = file.read()
67
+
68
+ base64_image = base64.b64encode(bytes).decode("utf-8")
69
+ messages = [HumanMessage(content=[
70
+ {
71
+ "type": "text",
72
+ "text": (
73
+ f"{question}"
74
+ )
75
+ },
76
+ {
77
+ "type": "image_url",
78
+ "image_url": {
79
+ "url": f"data:image/png;base64,{base64_image}"
80
+
81
+ }
82
+ }
83
+ ])]
84
+ response = vision_llm.invoke(messages)
85
+ return response.content
86
+ except Exception as e:
87
+ print(str(e))
88
+ return ""
89
+
90
+ def transcribe_audio(file_name: str):
91
+ """
92
+ Transcribes audio file.
93
+ """
94
+
95
+ model = whisper.load_model("small")
96
+ result = model.transcribe(file_name)
97
+ return result["text"]
98
+
99
+ def download_youtube_video(url: str):
100
+ """
101
+ Download a YouTube video.
102
+ """
103
+ output_path = f"output_{uuid.uuid4()}"
104
+
105
+ ydl_opts = {
106
+ 'format': 'bestvideo+bestaudio/best',
107
+ 'outtmpl': output_path,
108
+ 'merge_output_format': 'mp4', # Use mp4 as the final output format
109
+ 'quiet': True,
110
+ }
111
+
112
+ with yt_dlp.YoutubeDL(ydl_opts) as ydl:
113
+ ydl.download([url])
114
+
115
+ return output_path+".mp4"
116
+
117
+ def read_excel(file_name: str):
118
+ """
119
+ Read the contents of an Excel file.
120
+ """
121
+
122
+ df = pd.read_excel(file_name)
123
+ print(df.to_string(index=False))
124
+ return df.to_string(index=False)
125
+
126
+ def read_file(file_name: str):
127
+ """
128
+ Read the content of a text-based file.
129
+ """
130
+ with open(file_name, 'r') as file:
131
+ content = file.read()
132
+ return content
133
 
134
+ def watch_video(file_name: str):
135
+ """
136
+ Extract frames from a video and interpret them.
137
+ """
138
+
139
+ if os.path.exists("extracted_frames"):
140
+ shutil.rmtree("extracted_frames")
141
+
142
+ os.makedirs("extracted_frames")
143
+
144
+ cap = cv2.VideoCapture(file_name)
145
+ fps = cap.get(cv2.CAP_PROP_FPS)
146
+ frame_interval = int(fps * 5)
147
+
148
+ frame_count = 0
149
+ saved_count = 0
150
+
151
+ while True:
152
+ ret, frame = cap.read()
153
+ if not ret:
154
+ break
155
+
156
+ if frame_count % frame_interval == 0:
157
+ filename = os.path.join("extracted_frames", f"frame_{saved_count:04d}.jpg")
158
+ cv2.imwrite(filename, frame)
159
+ saved_count+=1
160
+
161
+ frame_count+=1
162
+
163
+ cap.release()
164
+ print(f"Saved {saved_count}")
165
+
166
+ captions = []
167
+
168
+ for file in sorted(os.listdir("extracted_frames")):
169
+ file_path = os.path.join("extracted_frames", file)
170
+ caption = interpret_image(file_path, "Return a one line description of the image.")
171
+ print(caption)
172
+ captions.append(caption)
173
+
174
+ print(captions)
175
+ return captions
176
+
177
+
178
  browser_tool = HyperbrowserBrowserUseTool()
179
+ # search_tool = DuckDuckGoSearchRun()
180
+ search_tool = TavilySearch()
181
+ code_executor_tool = ExecPython()
182
+ tools = [search_tool, code_executor_tool, interpret_image, get_file, transcribe_audio, download_youtube_video, read_file, watch_video, read_excel]
183
+ llm = ChatOpenAI(model="gpt-4o", temperature=0)
184
+ llm_with_tools = llm.bind_tools(tools)
185
+
186
+ def assistant(state:AgentState):
187
+
188
+ task_id = state["task_id"]
189
+
190
+ image_tool_description = """
191
+ interpret_image(image_name: str) -> str:
192
+ Interpret an image for analysis.
193
+
194
+ Args:
195
+ image_name: Name of the downloaded image file as string.
196
+ question: Question about the image as string.
197
+
198
+ Returns:
199
+ An interpretation of the image as string.
200
+ """
201
+
202
+ download_file_tool_description = """
203
+ get_file(task_id: str) -> str:
204
+ Download a file locally for a given task.
205
+
206
+ Args:
207
+ task_id: The ID of the current task as string.
208
+
209
+ Returns:
210
+ The name of the downloaded file as string.
211
+ """
212
+
213
+ audio_tool_description = """
214
+ transcribe_audio(file_name: str) -> str:
215
+ Transcribe an audio file.
216
+
217
+ Args:
218
+ file_name: The name of the audio file as string.
219
+
220
+ Returns:
221
+ The transcription of the audio as string.
222
+ """
223
+
224
+ download_youtube_video_description = """
225
+ download_youtube_video(url: str, output_path: str):
226
+ Downloads a YouTube video.
227
+
228
+ Args:
229
+ url: URL of the YouTube video as string.
230
+
231
+ Returns:
232
+ The output path for the file.
233
+ """
234
+
235
+ excel_tool_description = """
236
+ read_excel(file_name: str) -> str:
237
+ Read the content of an Excel file.
238
+
239
+ Args:
240
+ file_name: The name of the Excel file as string.
241
+
242
+ Returns:
243
+ A string representation of the content of the file.
244
+ """
245
+
246
+ read_file_tool_description = """
247
+ read_file(file_name: str) -> str:
248
+ Read the content of a text-based file.
249
+
250
+ Args:
251
+ file_name: The name of the file as string.
252
+
253
+ Returns:
254
+ A string containing the content of the file.
255
+ """
256
+
257
+ watch_video_tool_description = """
258
+ watch_video(file_name: str) -> str:
259
+ Extract frames from a video and interpret them.
260
+
261
+ Args:
262
+ file_name: The name of the file as string.
263
+
264
+ Returns:
265
+ A list of captions for each frame.
266
+ """
267
+
268
+ search_tool_description = search_tool.description
269
+ code_executor_tool_description = code_executor_tool.description
270
+
271
+ has_file = state["has_file"]
272
+
273
+ system_message = SystemMessage(content=f"""
274
+ You are a general AI assistant. I will ask you a question.
275
+
276
+ Your response should be a number, OR as few words as possible, OR a comma-separated list of numbers and/or strings.
277
+ You SHOULD NOT provide explanations in your response.
278
+ If you are asked for a number, don't use a comma to write your number, neither use symbols such as $ or % unless specified otherwise.
279
+ If you are asked for a string, don't use articles, neither abbreviations (e.g. for cities).
280
+ If you are asked for a comma separated list, apply the above rules depending of whether the element to be put in the list is a number or a string.
281
+ If you are including text from the question in your response, make sure to include the text exactly as it appears in the question (e.g. with adjective).
282
+ Do NOT end your response with a period.
283
+ Do NOT write numbers as text.
284
+
285
+ You have access to the following tools, which you can use as needed to answer a question:
286
+ - File downloading tool: {download_file_tool_description}
287
+ - Image interpretation tool: {image_tool_description}
288
+ - YouTube video downloader: {download_youtube_video_description}
289
+ - Audio transcription tool: {audio_tool_description}
290
+ - Text-based file reading tool: {read_file_tool_description}
291
+ - Internet search tool: {search_tool_description}
292
+ - Code execution tool: {code_executor_tool_description}
293
+ - Watch video tool: {watch_video_tool_description}
294
+ - Read Excel file tool: {excel_tool_description}
295
+
296
+ You may download a file for a given task ONLY if it has a file by using its associated task ID.
297
+ Always ensure you have downloaded a file before using a relevant tool.
298
+ You MUST use the name of a particular downloaded file in your tool call. Do NOT use a file name mentioned in the question.
299
+ When asked about a YouTube video, you can watch it and/or hear it.
300
+ When writing code, avoid excess formatting and keep it clean.
301
+ Do NOT make up answers, instead use a tool to answer the question.
302
+
303
+ The current task ID is {task_id}.
304
+ The current task has a file: {has_file}
305
+ """)
306
+
307
  response = llm_with_tools.invoke([system_message] + state["messages"])
308
  print(response)
309
+ print("\n\n")
310
  return {
311
+ "messages": [response],
312
+ "task_id": task_id,
313
+ "has_file": has_file
314
  }
315
 
316
+ workflow = StateGraph(AgentState)
317
  workflow.add_node("assistant", assistant)
318
  workflow.add_node("tools", ToolNode(tools))
319
  workflow.add_edge(START, "assistant")
320
  workflow.add_conditional_edges("assistant", tools_condition)
321
  workflow.add_edge("tools", "assistant")
322
  app = workflow.compile()
323
+
 
 
 
324
 
325
  # --- Basic Agent Definition ---
326
  # ----- THIS IS WHERE YOU CAN BUILD WHAT YOU WANT ------
327
  class BasicAgent:
328
  def __init__(self):
329
  print("BasicAgent initialized.")
330
+ def __call__(self, question: str, task_id: str, has_file: bool) -> str:
331
  print(f"Agent received question (first 50 chars): {question[:50]}...")
332
  # fixed_answer = "This is a default answer."
333
  messages = [HumanMessage(content=question)]
334
+ state = {"messages": messages, "task_id": task_id, "has_file": has_file}
335
+ answer = app.invoke(state)
336
  answer = answer["messages"][-1].content
337
  # print(f"Agent returning fixed answer: {fixed_answer}")
338
  print(f"Agent returning answer: {answer}")
 
395
  for item in questions_data:
396
  task_id = item.get("task_id")
397
  question_text = item.get("question")
398
+ has_file=False
399
+
400
+ if item.get("file_name"):
401
+ has_file=True
402
+
403
  if not task_id or question_text is None:
404
  print(f"Skipping item with missing task_id or question: {item}")
405
  continue
406
  try:
407
+ print(task_id)
408
+ submitted_answer = agent(question_text, task_id, has_file)
409
  answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
410
  results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
411
  except Exception as e:
 
520
  print("ℹ️ SPACE_ID environment variable not found (running locally?). Repo URL cannot be determined.")
521
 
522
  print("-"*(60 + len(" App Starting ")) + "\n")
523
+
524
+ # try:
525
+ # random_url = f"{DEFAULT_API_URL}/random-question"
526
+ # response = requests.get(random_url, timeout=20)
527
+ # response.raise_for_status()
528
+ # question = response.json()
529
+ # print(question)
530
+ # agent = BasicAgent()
531
+ # print(question.get("question"))
532
+ # print(question.get("task_id"))
533
+ # has_file=False
534
+ # if question.get("file_name"):
535
+ # has_file=True
536
+ # print(agent(question.get("question"), question.get("task_id"), has_file))
537
 
538
+ # except Exception as e:
539
+ # print(str(e))
540
+
541
  print("Launching Gradio Interface for Basic Agent Evaluation...")
542
+ demo.launch(debug=True, share=False)
543
+
544
+
545
+
requirements.txt CHANGED
@@ -2,7 +2,10 @@ gradio
2
  requests
3
  langchain_core
4
  langgraph
5
- langchain-huggingface
6
- langchain-hyperbrowser
7
  duckduckgo-search
8
- langchain-community
 
 
 
 
 
 
2
  requests
3
  langchain_core
4
  langgraph
 
 
5
  duckduckgo-search
6
+ langchain-community
7
+ openai-whisper
8
+ yt-dlp
9
+ rizaio
10
+ langchain-openai
11
+ langchain-tavily