navodit17 commited on
Commit
396f5a0
·
1 Parent(s): 81917a3

agent with search, file read, youtube

Browse files
Files changed (11) hide show
  1. .DS_Store +0 -0
  2. .gitignore +4 -0
  3. __init__.py +0 -0
  4. agent.py +69 -0
  5. app.py +39 -14
  6. prompt.py +72 -0
  7. tool.py +81 -0
  8. tools/__init__.py +0 -0
  9. tools/web_search.py +47 -0
  10. utils/__init__.py +0 -0
  11. utils/fetch_file.py +38 -0
.DS_Store ADDED
Binary file (6.15 kB). View file
 
.gitignore ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ .env
2
+ __pycache__/
3
+ *.py[cod]
4
+ gaia_files
__init__.py ADDED
File without changes
agent.py ADDED
@@ -0,0 +1,69 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from dotenv import load_dotenv
2
+ from smolagents import CodeAgent
3
+ from smolagents import OpenAIServerModel
4
+ from tool import fetch_webpage, read_file_tool, get_youtube_transcript
5
+
6
+ from smolagents import VisitWebpageTool, WikipediaSearchTool, PythonInterpreterTool, DuckDuckGoSearchTool, WebSearchTool, SpeechToTextTool
7
+
8
+ from prompt import gaia_prompt
9
+
10
+ load_dotenv()
11
+
12
+ openai_nano_model = OpenAIServerModel(
13
+ model_id="gpt-4.1-nano-2025-04-14",
14
+ # model_id="o3-mini-2025-01-31",
15
+ )
16
+
17
+ gaia_agent = CodeAgent(
18
+ model=openai_nano_model,
19
+ tools=[fetch_webpage, DuckDuckGoSearchTool(), PythonInterpreterTool(), read_file_tool, get_youtube_transcript], # WikipediaSearchTool(), VisitWebpageTool(max_output_length=60000)
20
+ max_steps=5,
21
+ verbosity_level=2,
22
+ additional_authorized_imports=["requests", "bs4", "pandas", "numpy", "markdownify"]
23
+ )
24
+
25
+ class GAIA_Agent:
26
+ def __init__(self):
27
+ self.system_prompt = gaia_prompt
28
+ self.agent = gaia_agent
29
+
30
+ def __call__(self, question: str) -> str:
31
+
32
+ try:
33
+
34
+ full_context = self.system_prompt + "\nTHE QUESTION:\n" + question
35
+
36
+ final_answer = self.agent.run(full_context)
37
+ return final_answer
38
+ except Exception as e:
39
+ error = f"An error occurred while processing the question: {e}"
40
+ print(error)
41
+ return error
42
+
43
+ # build context + append instructions and all
44
+
45
+ # clean answer function
46
+
47
+ if __name__ == "__main__":
48
+ pass
49
+ # gaia_agent.run("What is the weather in Mumbai?")
50
+ # answer = gaia_agent.run(
51
+ # f"""
52
+ # You are a general AI assistant. I will ask you a question. You can answer with the following template:[YOUR FINAL ANSWER]. YOUR FINAL ANSWER should be a number OR as few words as possible OR a comma separated list of numbers and/or strings. If you are asked for a number, don't use comma to write your number neither use units such as $ or percent sign unless specified otherwise. If you are asked for a string, don't use articles, neither abbreviations (e.g. for cities), and write the digits in plain text unless specified otherwise. If you are asked for a comma separated list, apply the above rules depending of whether the element to be put in the list is a number or a string. Remember: GAIA requires exact answer matching. Just provide the factual answer.
53
+
54
+ # How many studio albums were published by Mercedes Sosa between 2000 and 2009 (included)? You can use the latest 2022 version of english wikipedia.
55
+ # """
56
+ # )
57
+
58
+ # print(gaia_prompt)
59
+ # answer = gaia_agent.run("""
60
+ # You are a general AI assistant. I will ask you a question. Report your thoughts, and finish your answer with the following template: FINAL ANSWER: [YOUR FINAL ANSWER]. YOUR FINAL ANSWER should be a number OR as few words as possible OR a comma separated list of numbers and/or strings. If you are asked for a number, don't use comma to write your number neither use units such as $ or percent sign unless specified otherwise. If you are asked for a string, don't use articles, neither abbreviations (e.g. for cities), and write the digits in plain text unless specified otherwise. If you are asked for a comma separated list, apply the above rules depending of whether the element to be put in the list is a number or a string.
61
+ # You can search for results and then visit a webpage to get more information. Break down the problem into smaller sub-problems and solve them one by one.
62
+ # Think like a human.
63
+
64
+ # What is the final numeric output from the attached Python code?
65
+
66
+ # ----
67
+
68
+ # """)
69
+ # print(f"this is the final answer the gaia agent gave ---> {answer}")
app.py CHANGED
@@ -3,6 +3,8 @@ import gradio as gr
3
  import requests
4
  import inspect
5
  import pandas as pd
 
 
6
 
7
  # (Keep Constants as is)
8
  # --- Constants ---
@@ -37,10 +39,12 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
37
  api_url = DEFAULT_API_URL
38
  questions_url = f"{api_url}/questions"
39
  submit_url = f"{api_url}/submit"
 
40
 
41
  # 1. Instantiate Agent ( modify this part to create your agent)
42
  try:
43
- agent = BasicAgent()
 
44
  except Exception as e:
45
  print(f"Error instantiating agent: {e}")
46
  return f"Error initializing agent: {e}", None
@@ -73,19 +77,39 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
73
  results_log = []
74
  answers_payload = []
75
  print(f"Running agent on {len(questions_data)} questions...")
76
- for item in questions_data:
77
- task_id = item.get("task_id")
78
- question_text = item.get("question")
79
- if not task_id or question_text is None:
80
- print(f"Skipping item with missing task_id or question: {item}")
81
- continue
82
- try:
83
- submitted_answer = agent(question_text)
84
- answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
85
- results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
86
- except Exception as e:
87
- print(f"Error running agent on task {task_id}: {e}")
88
- results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"AGENT ERROR: {e}"})
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
89
 
90
  if not answers_payload:
91
  print("Agent did not produce any answers to submit.")
@@ -95,6 +119,7 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
95
  submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
96
  status_update = f"Agent finished. Submitting {len(answers_payload)} answers for user '{username}'..."
97
  print(status_update)
 
98
 
99
  # 5. Submit
100
  print(f"Submitting {len(answers_payload)} answers to: {submit_url}")
 
3
  import requests
4
  import inspect
5
  import pandas as pd
6
+ from utils.fetch_file import download_file_if_any
7
+ from agent import GAIA_Agent
8
 
9
  # (Keep Constants as is)
10
  # --- Constants ---
 
39
  api_url = DEFAULT_API_URL
40
  questions_url = f"{api_url}/questions"
41
  submit_url = f"{api_url}/submit"
42
+ file_path = f"{api_url}/files"
43
 
44
  # 1. Instantiate Agent ( modify this part to create your agent)
45
  try:
46
+ # agent = BasicAgent()
47
+ agent = GAIA_Agent()
48
  except Exception as e:
49
  print(f"Error instantiating agent: {e}")
50
  return f"Error initializing agent: {e}", None
 
77
  results_log = []
78
  answers_payload = []
79
  print(f"Running agent on {len(questions_data)} questions...")
80
+ to_answer = [1, 3, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20]
81
+ for index, item in enumerate(questions_data):
82
+
83
+ if index + 1 in to_answer:
84
+ task_id = item.get("task_id")
85
+ question_text = item.get("question")
86
+ file_name = item.get('file_name')
87
+ file_path = None
88
+
89
+ if file_name:
90
+ try:
91
+ file_path = download_file_if_any(task_id)
92
+ except Exception as e:
93
+ file_path = None
94
+
95
+ if not task_id or question_text is None:
96
+ print(f"Skipping item with missing task_id or question: {item}")
97
+ continue
98
+ try:
99
+ agent_question = question_text
100
+ if file_path:
101
+ agent_question += f"\n\nA file was downloaded for this task and saved locally at:\n {file_path}\n"
102
+
103
+ submitted_answer = agent(agent_question)
104
+
105
+ answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
106
+ results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
107
+ except Exception as e:
108
+ print(f"Error running agent on task {task_id}: {e}")
109
+ results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"AGENT ERROR: {e}"})
110
+
111
+ else:
112
+ print("Do not answer")
113
 
114
  if not answers_payload:
115
  print("Agent did not produce any answers to submit.")
 
119
  submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
120
  status_update = f"Agent finished. Submitting {len(answers_payload)} answers for user '{username}'..."
121
  print(status_update)
122
+ print(submission_data)
123
 
124
  # 5. Submit
125
  print(f"Submitting {len(answers_payload)} answers to: {submit_url}")
prompt.py ADDED
@@ -0,0 +1,72 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+
3
+ final_answer = """When answering, provide ONLY the precise answer requested.
4
+ Do not include explanations, steps, reasoning, or additional text.
5
+ Be direct and specific. GAIA benchmark requires exact matching answers.
6
+ """
7
+
8
+ final_answer1 = prompt = """
9
+ Here is a user-given task and the agent steps: {agent_memory.get_succinct_steps()}. Now here is the FINAL ANSWER that was given:
10
+ {final_answer}
11
+ Ensure the FINAL ANSWER is in the right format as asked for by the task. Here are the instructions that you need to evaluate:
12
+ YOUR FINAL ANSWER should be a number OR as few words as possible OR a comma separated list of numbers and/or strings.
13
+ If you are asked for a number, don't use commas to write your number. Don't use units such as $ or percent sign unless specified otherwise. Write your number in Arabic numbers (such as 9 or 3 or 1093) unless specified otherwise.
14
+ If you are asked for a currency in your answer, use the symbol for that currency. For example, if you are asked for the answers in USD, an example answer would be $40.00
15
+ If you are asked for a string, don't use articles, neither abbreviations (e.g. for cities), and write the digits in plain text unless specified otherwise.
16
+ If you are asked for a comma separated list, apply the above rules depending of whether the element to be put in the list is a number or a string.
17
+ If you are asked for a comma separated list, ensure you only return the content of that list, and NOT the brackets '[]'
18
+ First list reasons why it is/is not in the correct format and then write your final decision: PASS in caps lock if it is satisfactory, FAIL if it is not.
19
+ """
20
+
21
+
22
+ sys_prompt = """You are the top agent of a multi-agent system that can answer questions by coordinating the work of other agents.
23
+ You will receive a question and you will decide which agent to use to answer it.
24
+ You can use the web_agent to search the web for information and for fetching the content of a web page, or the audiovideo_agent to extract information from video or audio files.
25
+ You can also use your own knowledge to answer the question.
26
+ You need to respect the output format that is given to you.
27
+ Finding the correct answer to the question need reasoning and plannig, read the question carrefully, think step by step and do not skip any steps.
28
+ """
29
+
30
+ sys_prompt1 = """
31
+ You are a helpful assistant tasked with answering questions using a set of tools.
32
+
33
+ Your final answer must strictly follow this format:
34
+ FINAL ANSWER: [ANSWER]
35
+
36
+ Only write the answer in that exact format. Do not explain anything. Do not include any other text.
37
+
38
+ If you are provided with a similar question and its final answer, and the current question is **exactly the same**, then simply return the same final answer without using any tools.
39
+
40
+ Only use tools if the current question is different from the similar one.
41
+
42
+ Examples:
43
+ - FINAL ANSWER: FunkMonk
44
+ - FINAL ANSWER: Paris
45
+ - FINAL ANSWER: 128
46
+
47
+ If you do not follow this format exactly, your response will be considered incorrect."""
48
+
49
+
50
+ gaia_prompt = """
51
+ You are a helpful general AI assistant. You are tasked with answering questions from GAIA benchmark.
52
+ You can answer questions using a set of tools or you can also use your own knowledge to answer them.
53
+
54
+ Finding the correct answer to the question need reasoning and plannig, read the question carrefully, think step by step and do not skip any steps.
55
+ You have access to various tools, including, but not limited to, web search, visiting webpage, executing python code, reading files, transcribing audio, analysing images.
56
+ Break down the problem into smaller sub-problems and solve them one by one.
57
+
58
+ If the question has associated file, you can be use it to answer the question.
59
+
60
+ Your final answer must strictly follow this format:
61
+ [FINAL ANSWER]
62
+
63
+ YOUR FINAL ANSWER should be a number OR as few words as possible OR a comma separated list of numbers and/or strings.
64
+ If you are asked for a number, don't use comma to write your number neither use units such as $ or percent sign unless specified otherwise.
65
+ If you are asked for a string, don't use articles, neither abbreviations (e.g. for cities), and write the digits in plain text unless specified otherwise.
66
+ If you are asked for a comma separated list, apply the above rules depending of whether the element to be put in the list is a number or a string.
67
+ Reminder! When giving final answer, provide ONLY the precise answer requested. Do not include explanations, steps, reasoning, or additional text. Be direct and specific.
68
+ GAIA benchmark requires exact matching answers. If you do not follow this format exactly, your response will be considered incorrect.
69
+
70
+ BEFORE GIVING THE FINAL ANSWER DOUBLE CHECK THE EXACT FORMAT IN WHICH THE ANSWER IS NEEDED.
71
+
72
+ """
tool.py ADDED
@@ -0,0 +1,81 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from smolagents import Tool, tool
2
+ from youtube_transcript_api import YouTubeTranscriptApi
3
+
4
+ @tool
5
+ def fetch_webpage(url: str, convert_to_markdown: bool = True) -> str:
6
+ """
7
+ Visit a website / url and fetch the content of the webpage.
8
+ if markdown conversion is enabled, it will remove script and style and return the text content as markdown else return raw unfiltered HTML
9
+ Args:
10
+ url (str): The URL to fetch.
11
+ convert_to_markdown (bool): If True, convert the HTML content to Markdown format. else return the raw HTML.
12
+ Returns:
13
+ str: The HTML content of the URL.
14
+ """
15
+ import requests
16
+ from bs4 import BeautifulSoup
17
+ from markdownify import markdownify as md
18
+
19
+ content = None
20
+ headers = {
21
+ 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
22
+ }
23
+ response = requests.get(url, timeout=30, headers=headers)
24
+
25
+ if (convert_to_markdown):
26
+ soup = BeautifulSoup(response.text, "html.parser")
27
+
28
+ # remove script and style tags
29
+ for script in soup(["script", "style"]):
30
+ script.extract()
31
+
32
+ # for wikipedia only keep the main content
33
+ if "wikipedia.org" in url:
34
+ main_content = soup.find("main",{"id":"content"})
35
+ if main_content:
36
+ content = md(str(main_content),strip=['script', 'style'], heading_style="ATX").strip()
37
+ else:
38
+ content = md(response.text,strip=['script', 'style'], heading_style="ATX").strip()
39
+ else:
40
+ # Fallback for all other sites - from chatgpt - not tested
41
+ content = md(str(soup), strip=['script', 'style'], heading_style="ATX").strip()
42
+ else:
43
+ content = response.text
44
+
45
+ return content
46
+
47
+
48
+ @tool
49
+ def read_file_tool(file_path: str) -> str:
50
+ """
51
+ Tool to read a file and return its content.
52
+
53
+ Args:
54
+ file_path (str): Path to the file to read.
55
+
56
+ Returns:
57
+ str: Content of the file or error message.
58
+ """
59
+ try:
60
+ with open(file_path, "r") as file:
61
+ return file.read()
62
+ except Exception as e:
63
+ return f"Error reading file: {str(e)}"
64
+
65
+
66
+ @tool
67
+ def get_youtube_transcript(video_id: str) -> str:
68
+ """
69
+ Fetches the transcript of a YouTube video given its video ID.
70
+ Args:
71
+ video_id (str): The ID of the YouTube video. Pass in the video ID, NOT the video URL. For a video with the URL https://www.youtube.com/watch?v=12345 the ID is 12345.
72
+ Returns:
73
+ str: The transcript of the YouTube video. as a single string with each line separated by a newline character.
74
+ """
75
+ # Initialize the YouTubeTranscriptApi
76
+ ytt_api = YouTubeTranscriptApi()
77
+ fetched_transcript = ytt_api.fetch(video_id)
78
+ raw_data = fetched_transcript.to_raw_data()
79
+ # raw data is in the form of [{ 'text': 'Hey there', 'start': 0.0, 'duration': 1.54 }, { 'text': 'how are you',, 'start': 1.54, 'duration': 4.16 }, ... ] we will return ony the text element as lines
80
+ transcript = "\n".join([item['text'] for item in raw_data])
81
+ return transcript
tools/__init__.py ADDED
File without changes
tools/web_search.py ADDED
@@ -0,0 +1,47 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from smolagents import tool
2
+
3
+ @tool
4
+ def fetch_webpage(url: str, convert_to_markdown: bool = True) -> str:
5
+ """
6
+ Visits a website and fetches the content of a given URL / webpage.
7
+ if markdown conversion is enabled, it will remove script and style and return the text content as markdown else return raw unfiltered HTML
8
+ Args:
9
+ url (str): The URL to fetch.
10
+ convert_to_markdown (bool): If True, convert the HTML content to Markdown format. else return the raw HTML.
11
+ Returns:
12
+ str: The HTML content of the URL.
13
+ """
14
+ import requests
15
+ from bs4 import BeautifulSoup
16
+ from markdownify import markdownify as md
17
+
18
+ content = None
19
+ headers = {
20
+ 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
21
+ }
22
+ response = requests.get(url, timeout=30, headers=headers)
23
+ # print(response.text)
24
+ if response.text is not None:
25
+ print("not none")
26
+ if (convert_to_markdown):
27
+ soup = BeautifulSoup(response.text, "html.parser")
28
+ # remove script and style tags
29
+ for script in soup(["script", "style"]):
30
+ script.extract()
31
+
32
+ # for wikipedia only keep the main content
33
+ if "wikipedia.org" in url:
34
+ main_content = soup.find("main",{"id":"content"})
35
+ if main_content:
36
+ content = md(str(main_content),strip=['script', 'style'], heading_style="ATX").strip()
37
+ else:
38
+ content = md(response.text,strip=['script', 'style'], heading_style="ATX").strip()
39
+ else:
40
+ # Fallback for all other sites - from chatgpt - not tested
41
+ content = md(str(soup), strip=['script', 'style'], heading_style="ATX").strip()
42
+ else:
43
+ content = response.text
44
+
45
+ # save_file_with_timestamp(content, "webpage", ".md" if convert_to_markdown else ".html")
46
+
47
+ return content
utils/__init__.py ADDED
File without changes
utils/fetch_file.py ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import requests
2
+ import os
3
+
4
+ DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
5
+ files_folder = "gaia_files"
6
+ os.makedirs(files_folder, exist_ok=True)
7
+
8
+ def download_file_if_any(task_id: str) -> str:
9
+ url = f"{DEFAULT_API_URL}/files/{task_id}"
10
+ try:
11
+ response = requests.get(url)
12
+ response.raise_for_status()
13
+
14
+ content_disposition = response.headers.get("content-disposition")
15
+ filename = content_disposition.split("filename=")[1].strip('"') if content_disposition and "filename=" in content_disposition else None
16
+
17
+ if filename:
18
+ # path relative from app.py
19
+ file_path = os.path.join(files_folder, filename)
20
+
21
+ with open(file_path, "wb") as file:
22
+ file.write(response.content)
23
+
24
+ return str(file_path)
25
+ else:
26
+ return None
27
+
28
+ except requests.exceptions.RequestException as e:
29
+ print(f"Error making request: {e}")
30
+ return None
31
+ except ValueError as e:
32
+ print(f"Error decoding JSON response: {e}")
33
+ return None
34
+
35
+
36
+
37
+ if __name__ == "__main__":
38
+ print(download_file_if_any("f918266a-b3e0-4914-865d-4faa564f1aef"))