benjosaur commited on
Commit
59b66b3
·
1 Parent(s): 679df7e

Local Submitting Solution

Browse files
Files changed (4) hide show
  1. app.py +112 -69
  2. requirements.txt +2 -1
  3. search.py +7 -0
  4. tools.py +39 -22
app.py CHANGED
@@ -1,5 +1,6 @@
1
  import os
2
- import gradio as gr
 
3
  import requests
4
  import inspect
5
  import pandas as pd
@@ -9,8 +10,9 @@ from llama_index.llms.huggingface_api import HuggingFaceInferenceAPI
9
  from tools import (
10
  APIProcessor,
11
  parse_youtube_video,
12
- transcribe_image_from_link,
13
  transcribe_webpage,
 
14
  )
15
  from utils import format_final_answer
16
  from search import GoogleSearch
@@ -42,18 +44,25 @@ class BasicAgent:
42
 
43
  agent = AgentWorkflow.from_tools_or_functions(
44
  [
 
45
  google_search,
46
  google_image_search,
47
- get_and_process_question_attachment,
48
  parse_youtube_video,
49
- transcribe_image_from_link,
50
  transcribe_webpage,
51
  ],
52
  llm=self.llm,
53
  system_prompt=SYSTEM_PROMPT,
54
  )
 
 
 
 
 
 
 
55
  ctx = Context(agent)
56
- handler = agent.run(question, ctx=ctx)
57
  async for ev in handler.stream_events():
58
  if isinstance(ev, ToolCallResult):
59
  print("")
@@ -70,7 +79,8 @@ class BasicAgent:
70
  return final_answer
71
 
72
 
73
- async def run_and_submit_all(profile: gr.OAuthProfile | None):
 
74
  """
75
  Fetches all questions, runs the BasicAgent on them, submits all answers,
76
  and displays the results.
@@ -78,12 +88,13 @@ async def run_and_submit_all(profile: gr.OAuthProfile | None):
78
  # --- Determine HF Space Runtime URL and Repo URL ---
79
  space_id = os.getenv("SPACE_ID") # Get the SPACE_ID for sending link to the code
80
 
81
- if profile:
82
- username = f"{profile.username}"
83
- print(f"User logged in: {username}")
84
- else:
85
- print("User not logged in.")
86
- return "Please Login to Hugging Face with the button.", None
 
87
 
88
  api_url = DEFAULT_API_URL
89
  questions_url = f"{api_url}/questions"
@@ -124,7 +135,7 @@ async def run_and_submit_all(profile: gr.OAuthProfile | None):
124
  results_log = []
125
  answers_payload = []
126
  print(f"Running agent on {len(questions_data)} questions...")
127
- async for item in questions_data:
128
  task_id = item.get("task_id")
129
  question_text = item.get("question")
130
  file_name = item.get("file_name")
@@ -133,6 +144,9 @@ async def run_and_submit_all(profile: gr.OAuthProfile | None):
133
  continue
134
  try:
135
  submitted_answer = await agent(question_text, task_id, file_name)
 
 
 
136
  answers_payload.append(
137
  {"task_id": task_id, "submitted_answer": submitted_answer}
138
  )
@@ -164,6 +178,7 @@ async def run_and_submit_all(profile: gr.OAuthProfile | None):
164
  "answers": answers_payload,
165
  }
166
  status_update = f"Agent finished. Submitting {len(answers_payload)} answers for user '{username}'..."
 
167
  print(status_update)
168
 
169
  # 5. Submit
@@ -210,61 +225,89 @@ async def run_and_submit_all(profile: gr.OAuthProfile | None):
210
  return status_message, results_df
211
 
212
 
213
- # --- Build Gradio Interface using Blocks ---
214
- with gr.Blocks() as demo:
215
- gr.Markdown("# Basic Agent Evaluation Runner")
216
- gr.Markdown(
217
- """
218
- **Instructions:**
219
-
220
- 1. Please clone this space, then modify the code to define your agent's logic, the tools, the necessary packages, etc ...
221
- 2. Log in to your Hugging Face account using the button below. This uses your HF username for submission.
222
- 3. Click 'Run Evaluation & Submit All Answers' to fetch questions, run your agent, submit answers, and see the score.
223
-
224
- ---
225
- **Disclaimers:**
226
- Once clicking on the "submit button, it can take quite some time ( this is the time for the agent to go through all the questions).
227
- This space provides a basic setup and is intentionally sub-optimal to encourage you to develop your own, more robust solution. For instance for the delay process of the submit button, a solution could be to cache the answers and submit in a seperate action or even to answer the questions in async.
228
- """
229
- )
230
-
231
- gr.LoginButton()
232
-
233
- run_button = gr.Button("Run Evaluation & Submit All Answers")
234
-
235
- status_output = gr.Textbox(
236
- label="Run Status / Submission Result", lines=5, interactive=False
237
- )
238
- # Removed max_rows=10 from DataFrame constructor
239
- results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
240
-
241
- run_button.click(fn=run_and_submit_all, outputs=[status_output, results_table])
242
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
243
 
244
  if __name__ == "__main__":
245
- print("\n" + "-" * 30 + " App Starting " + "-" * 30)
246
- # Check for SPACE_HOST and SPACE_ID at startup for information
247
- space_host_startup = os.getenv("SPACE_HOST")
248
- space_id_startup = os.getenv("SPACE_ID") # Get SPACE_ID at startup
249
-
250
- if space_host_startup:
251
- print(f"✅ SPACE_HOST found: {space_host_startup}")
252
- print(f" Runtime URL should be: https://{space_host_startup}.hf.space")
253
- else:
254
- print("ℹ️ SPACE_HOST environment variable not found (running locally?).")
255
-
256
- if space_id_startup: # Print repo URLs if SPACE_ID is found
257
- print(f"✅ SPACE_ID found: {space_id_startup}")
258
- print(f" Repo URL: https://huggingface.co/spaces/{space_id_startup}")
259
- print(
260
- f" Repo Tree URL: https://huggingface.co/spaces/{space_id_startup}/tree/main"
261
- )
262
- else:
263
- print(
264
- "ℹ️ SPACE_ID environment variable not found (running locally?). Repo URL cannot be determined."
265
- )
266
-
267
- print("-" * (60 + len(" App Starting ")) + "\n")
268
-
269
- print("Launching Gradio Interface for Basic Agent Evaluation...")
270
- demo.launch(debug=True, share=False)
 
 
1
  import os
2
+
3
+ # import gradio as gr
4
  import requests
5
  import inspect
6
  import pandas as pd
 
10
  from tools import (
11
  APIProcessor,
12
  parse_youtube_video,
13
+ transcribe_image_from_url,
14
  transcribe_webpage,
15
+ add_numbers,
16
  )
17
  from utils import format_final_answer
18
  from search import GoogleSearch
 
44
 
45
  agent = AgentWorkflow.from_tools_or_functions(
46
  [
47
+ add_numbers,
48
  google_search,
49
  google_image_search,
 
50
  parse_youtube_video,
51
+ transcribe_image_from_url,
52
  transcribe_webpage,
53
  ],
54
  llm=self.llm,
55
  system_prompt=SYSTEM_PROMPT,
56
  )
57
+
58
+ attached_contents = get_and_process_question_attachment()
59
+
60
+ user_message = (
61
+ question + f"\n\nContents of attached file: {file_name}" + attached_contents
62
+ )
63
+
64
  ctx = Context(agent)
65
+ handler = agent.run(user_message, ctx=ctx)
66
  async for ev in handler.stream_events():
67
  if isinstance(ev, ToolCallResult):
68
  print("")
 
79
  return final_answer
80
 
81
 
82
+ # async def run_and_submit_all(profile: gr.OAuthProfile | None):
83
+ async def run_and_submit_all():
84
  """
85
  Fetches all questions, runs the BasicAgent on them, submits all answers,
86
  and displays the results.
 
88
  # --- Determine HF Space Runtime URL and Repo URL ---
89
  space_id = os.getenv("SPACE_ID") # Get the SPACE_ID for sending link to the code
90
 
91
+ # if profile:
92
+ # username = f"{profile.username}"
93
+ # print(f"User logged in: {username}")
94
+ # else:
95
+ # print("User not logged in.")
96
+ # return "Please Login to Hugging Face with the button.", None
97
+ username = "benjosaur"
98
 
99
  api_url = DEFAULT_API_URL
100
  questions_url = f"{api_url}/questions"
 
135
  results_log = []
136
  answers_payload = []
137
  print(f"Running agent on {len(questions_data)} questions...")
138
+ for item in questions_data:
139
  task_id = item.get("task_id")
140
  question_text = item.get("question")
141
  file_name = item.get("file_name")
 
144
  continue
145
  try:
146
  submitted_answer = await agent(question_text, task_id, file_name)
147
+ print(f"Submitted Answer: {submitted_answer}")
148
+ print("==" * 50)
149
+ print("")
150
  answers_payload.append(
151
  {"task_id": task_id, "submitted_answer": submitted_answer}
152
  )
 
178
  "answers": answers_payload,
179
  }
180
  status_update = f"Agent finished. Submitting {len(answers_payload)} answers for user '{username}'..."
181
+ print(f"ANSWERS PAYLOAD: {answers_payload}")
182
  print(status_update)
183
 
184
  # 5. Submit
 
225
  return status_message, results_df
226
 
227
 
228
+ # # --- Build Gradio Interface using Blocks ---
229
+ # with gr.Blocks() as demo:
230
+ # gr.Markdown("# Basic Agent Evaluation Runner")
231
+ # gr.Markdown(
232
+ # """
233
+ # **Instructions:**
234
+
235
+ # 1. Please clone this space, then modify the code to define your agent's logic, the tools, the necessary packages, etc ...
236
+ # 2. Log in to your Hugging Face account using the button below. This uses your HF username for submission.
237
+ # 3. Click 'Run Evaluation & Submit All Answers' to fetch questions, run your agent, submit answers, and see the score.
238
+
239
+ # ---
240
+ # **Disclaimers:**
241
+ # Once clicking on the "submit button, it can take quite some time ( this is the time for the agent to go through all the questions).
242
+ # This space provides a basic setup and is intentionally sub-optimal to encourage you to develop your own, more robust solution. For instance for the delay process of the submit button, a solution could be to cache the answers and submit in a seperate action or even to answer the questions in async.
243
+ # """
244
+ # )
245
+
246
+ # gr.LoginButton()
247
+
248
+ # run_button = gr.Button("Run Evaluation & Submit All Answers")
249
+
250
+ # status_output = gr.Textbox(
251
+ # label="Run Status / Submission Result", lines=5, interactive=False
252
+ # )
253
+ # # Removed max_rows=10 from DataFrame constructor
254
+ # results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
255
+
256
+ # run_button.click(fn=run_and_submit_all, outputs=[status_output, results_table])
257
+
258
+ # async def main():
259
+ # agent = BasicAgent()
260
+ # api_url = DEFAULT_API_URL
261
+ # questions_url = f"{api_url}/questions"
262
+ # print(f"Fetching questions from: {questions_url}")
263
+
264
+ # response = requests.get(questions_url, timeout=15)
265
+ # response.raise_for_status()
266
+ # questions_data = response.json()
267
+
268
+ # # 3. Run your Agent
269
+ # results_log = []
270
+ # answers_payload = []
271
+ # print(f"Running agent on {len(questions_data)} questions...")
272
+ # item = questions_data[0]
273
+ # task_id = item.get("task_id")
274
+ # question_text = item.get("question")
275
+ # file_name = item.get("file_name")
276
+ # submitted_answer = await agent(question_text, task_id, file_name)
277
+ # answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
278
+ # results_log.append(
279
+ # {
280
+ # "Task ID": task_id,
281
+ # "Question": question_text,
282
+ # "Submitted Answer": submitted_answer,
283
+ # }
284
+ # )
285
 
286
  if __name__ == "__main__":
287
+ # print("\n" + "-" * 30 + " App Starting " + "-" * 30)
288
+ # # Check for SPACE_HOST and SPACE_ID at startup for information
289
+ # space_host_startup = os.getenv("SPACE_HOST")
290
+ # space_id_startup = os.getenv("SPACE_ID") # Get SPACE_ID at startup
291
+
292
+ # if space_host_startup:
293
+ # print(f"✅ SPACE_HOST found: {space_host_startup}")
294
+ # print(f" Runtime URL should be: https://{space_host_startup}.hf.space")
295
+ # else:
296
+ # print("ℹ️ SPACE_HOST environment variable not found (running locally?).")
297
+
298
+ # if space_id_startup: # Print repo URLs if SPACE_ID is found
299
+ # print(f"✅ SPACE_ID found: {space_id_startup}")
300
+ # print(f" Repo URL: https://huggingface.co/spaces/{space_id_startup}")
301
+ # print(
302
+ # f" Repo Tree URL: https://huggingface.co/spaces/{space_id_startup}/tree/main"
303
+ # )
304
+ # else:
305
+ # print(
306
+ # "ℹ️ SPACE_ID environment variable not found (running locally?). Repo URL cannot be determined."
307
+ # )
308
+
309
+ # print("-" * (60 + len(" App Starting ")) + "\n")
310
+
311
+ # print("Launching Gradio Interface for Basic Agent Evaluation...")
312
+ # demo.launch(debug=True, share=False)
313
+ asyncio.run(run_and_submit_all())
requirements.txt CHANGED
@@ -10,4 +10,5 @@ yt-dlp
10
  html2text
11
  llama-index-utils-workflow
12
  llama-index-llms-huggingface-api
13
- asyncio
 
 
10
  html2text
11
  llama-index-utils-workflow
12
  llama-index-llms-huggingface-api
13
+ asyncio
14
+ pydub
search.py CHANGED
@@ -6,6 +6,7 @@ import aiohttp
6
  class GoogleSearch:
7
  def __init__(self):
8
  load_dotenv()
 
9
  self.api_key = os.environ["GOOGLE_API_KEY"]
10
  self.cse_id = os.getenv("GOOGLE_CSE_ID")
11
 
@@ -17,6 +18,9 @@ class GoogleSearch:
17
  Returns:
18
  dict: JSON response from Google API.
19
  """
 
 
 
20
 
21
  if not self.api_key or not self.cse_id:
22
  raise ValueError(
@@ -46,6 +50,9 @@ class GoogleSearch:
46
  Returns:
47
  dict: JSON response from Google API.
48
  """
 
 
 
49
 
50
  if not self.api_key or not self.cse_id:
51
  raise ValueError(
 
6
  class GoogleSearch:
7
  def __init__(self):
8
  load_dotenv()
9
+ self.counter = 0
10
  self.api_key = os.environ["GOOGLE_API_KEY"]
11
  self.cse_id = os.getenv("GOOGLE_CSE_ID")
12
 
 
18
  Returns:
19
  dict: JSON response from Google API.
20
  """
21
+ if self.counter > 1:
22
+ return "No more searches, move on"
23
+ self.counter += 1
24
 
25
  if not self.api_key or not self.cse_id:
26
  raise ValueError(
 
50
  Returns:
51
  dict: JSON response from Google API.
52
  """
53
+ if self.counter > 2:
54
+ return "No more searches, move on"
55
+ self.counter += 1
56
 
57
  if not self.api_key or not self.cse_id:
58
  raise ValueError(
tools.py CHANGED
@@ -11,14 +11,18 @@ import re
11
  import html2text
12
  from requests.exceptions import RequestException
13
  from bs4 import BeautifulSoup
 
14
 
15
 
16
- def transcribe_image_from_link(image_link: str) -> str:
17
- """
18
  Args:
19
- image_link (str): URL of the image to transcribe
20
- """
21
- client = OpenAI() # Uses OPENAI_API_KEY environment variable
 
 
 
22
 
23
  response = client.chat.completions.create(
24
  model="gpt-4o",
@@ -35,7 +39,7 @@ def transcribe_image_from_link(image_link: str) -> str:
35
  {
36
  "type": "image_url",
37
  "image_url": {
38
- "url": image_link,
39
  "detail": "high",
40
  },
41
  },
@@ -68,7 +72,7 @@ def transcribe_webpage(website_url: str) -> str:
68
  content_div = soup.find("div", id="mw-content-text")
69
 
70
  if not content_div:
71
- return "Main content not found."
72
 
73
  # Only extract <p> and <table> tags
74
  elements = content_div.find_all(["p", "table"])
@@ -95,7 +99,7 @@ def transcribe_webpage(website_url: str) -> str:
95
  def parse_youtube_video(youtube_url: str) -> str:
96
  """Returns text transcript of a youtube video
97
  Args:
98
- youtube_url: the full url linking to the video to transcribe
99
  """
100
  load_dotenv()
101
  client = OpenAI()
@@ -107,7 +111,7 @@ def parse_youtube_video(youtube_url: str) -> str:
107
  {
108
  "key": "FFmpegExtractAudio",
109
  "preferredcodec": "mp3",
110
- "preferredquality": "192",
111
  }
112
  ],
113
  "outtmpl": "%(title)s.%(ext)s",
@@ -119,7 +123,6 @@ def parse_youtube_video(youtube_url: str) -> str:
119
  # Download audio
120
  with yt_dlp.YoutubeDL(ydl_opts) as ydl:
121
  info = ydl.extract_info(youtube_url, download=True)
122
- title = info["title"]
123
 
124
  # Find the downloaded audio file
125
  audio_file = None
@@ -131,13 +134,27 @@ def parse_youtube_video(youtube_url: str) -> str:
131
  if not audio_file:
132
  raise Exception("Audio file not found")
133
 
134
- # Transcribe with Whisper
135
- with open(audio_file, "rb") as audio:
136
- transcript = client.audio.transcriptions.create(
137
- model="gpt-4o-transcribe", file=audio
138
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
139
 
140
- return {"title": title, "transcript": transcript.text}
141
 
142
 
143
  class APIProcessor:
@@ -236,9 +253,9 @@ if __name__ == "__main__":
236
 
237
  # response = audio_task_processor.get_and_process_attachment()
238
  # print(response)
239
- # result = parse_youtube_video("https://www.youtube.com/watch?v=1htKBjuUWec")
240
- # print(result)
241
- text = transcribe_webpage(
242
- "https://en.wikipedia.org/wiki/Mercedes_Sosa#Studio_albums"
243
- )
244
- print(text)
 
11
  import html2text
12
  from requests.exceptions import RequestException
13
  from bs4 import BeautifulSoup
14
+ from pydub import AudioSegment
15
 
16
 
17
+ def add_numbers(*nums: list[int]) -> int:
18
+ """Add a list of numbers
19
  Args:
20
+ nums: list of numbers"""
21
+
22
+
23
+ def transcribe_image_from_url(image_url: str) -> str:
24
+ """Only works with full http urls"""
25
+ client = OpenAI()
26
 
27
  response = client.chat.completions.create(
28
  model="gpt-4o",
 
39
  {
40
  "type": "image_url",
41
  "image_url": {
42
+ "url": image_url,
43
  "detail": "high",
44
  },
45
  },
 
72
  content_div = soup.find("div", id="mw-content-text")
73
 
74
  if not content_div:
75
+ content_div = soup.find("div")
76
 
77
  # Only extract <p> and <table> tags
78
  elements = content_div.find_all(["p", "table"])
 
99
  def parse_youtube_video(youtube_url: str) -> str:
100
  """Returns text transcript of a youtube video
101
  Args:
102
+ youtube_url: full url linking to the video to transcribe
103
  """
104
  load_dotenv()
105
  client = OpenAI()
 
111
  {
112
  "key": "FFmpegExtractAudio",
113
  "preferredcodec": "mp3",
114
+ "preferredquality": "64",
115
  }
116
  ],
117
  "outtmpl": "%(title)s.%(ext)s",
 
123
  # Download audio
124
  with yt_dlp.YoutubeDL(ydl_opts) as ydl:
125
  info = ydl.extract_info(youtube_url, download=True)
 
126
 
127
  # Find the downloaded audio file
128
  audio_file = None
 
134
  if not audio_file:
135
  raise Exception("Audio file not found")
136
 
137
+ audio = AudioSegment.from_mp3(audio_file)
138
+ chunk_length_ms = 5 * 1000 * 60
139
+ chunks = []
140
+
141
+ for i in range(0, len(audio), chunk_length_ms):
142
+ chunk = audio[i : i + chunk_length_ms]
143
+ chunk_path = os.path.join(temp_dir, f"chunk_{i // chunk_length_ms}.mp3")
144
+ chunk.export(chunk_path, format="mp3")
145
+ chunks.append(chunk_path)
146
+
147
+ # Transcribe each chunk
148
+ full_transcript = ""
149
+ for chunk_path in chunks:
150
+ with open(chunk_path, "rb") as audio_chunk:
151
+ transcript = client.audio.transcriptions.create(
152
+ model="whisper-1",
153
+ file=audio_chunk,
154
+ )
155
+ full_transcript += transcript.text + " "
156
 
157
+ return full_transcript.strip()
158
 
159
 
160
  class APIProcessor:
 
253
 
254
  # response = audio_task_processor.get_and_process_attachment()
255
  # print(response)
256
+ result = parse_youtube_video("https://www.youtube.com/watch?v=1htKBjuUWec")
257
+ print(result)
258
+ # text = transcribe_webpage(
259
+ # "https://en.wikipedia.org/wiki/Mercedes_Sosa#Studio_albums"
260
+ # )
261
+ # print(text)