Spaces:
Sleeping
Sleeping
Complete Draft
Browse files- .gitignore +3 -0
- app.py +85 -32
- search.py +68 -0
- tools.py +192 -0
- utils.py +52 -0
.gitignore
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
.env
|
2 |
+
__pycache__
|
3 |
+
*.pyc
|
app.py
CHANGED
@@ -3,32 +3,63 @@ import gradio as gr
|
|
3 |
import requests
|
4 |
import inspect
|
5 |
import pandas as pd
|
|
|
|
|
|
|
|
|
|
|
6 |
|
7 |
# (Keep Constants as is)
|
8 |
# --- Constants ---
|
9 |
DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
|
10 |
|
|
|
11 |
# --- Basic Agent Definition ---
|
12 |
# ----- THIS IS WERE YOU CAN BUILD WHAT YOU WANT ------
|
|
|
|
|
|
|
|
|
13 |
class BasicAgent:
|
14 |
def __init__(self):
|
|
|
15 |
print("BasicAgent initialized.")
|
16 |
-
def __call__(self, question: str) -> str:
|
17 |
-
print(f"Agent received question (first 50 chars): {question[:50]}...")
|
18 |
-
fixed_answer = "This is a default answer."
|
19 |
-
print(f"Agent returning fixed answer: {fixed_answer}")
|
20 |
-
return fixed_answer
|
21 |
|
22 |
-
def
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
23 |
"""
|
24 |
Fetches all questions, runs the BasicAgent on them, submits all answers,
|
25 |
and displays the results.
|
26 |
"""
|
27 |
# --- Determine HF Space Runtime URL and Repo URL ---
|
28 |
-
space_id = os.getenv("SPACE_ID")
|
29 |
|
30 |
if profile:
|
31 |
-
username= f"{profile.username}"
|
32 |
print(f"User logged in: {username}")
|
33 |
else:
|
34 |
print("User not logged in.")
|
@@ -55,16 +86,16 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
|
|
55 |
response.raise_for_status()
|
56 |
questions_data = response.json()
|
57 |
if not questions_data:
|
58 |
-
|
59 |
-
|
60 |
print(f"Fetched {len(questions_data)} questions.")
|
61 |
except requests.exceptions.RequestException as e:
|
62 |
print(f"Error fetching questions: {e}")
|
63 |
return f"Error fetching questions: {e}", None
|
64 |
except requests.exceptions.JSONDecodeError as e:
|
65 |
-
|
66 |
-
|
67 |
-
|
68 |
except Exception as e:
|
69 |
print(f"An unexpected error occurred fetching questions: {e}")
|
70 |
return f"An unexpected error occurred fetching questions: {e}", None
|
@@ -76,23 +107,42 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
|
|
76 |
for item in questions_data:
|
77 |
task_id = item.get("task_id")
|
78 |
question_text = item.get("question")
|
|
|
79 |
if not task_id or question_text is None:
|
80 |
print(f"Skipping item with missing task_id or question: {item}")
|
81 |
continue
|
82 |
try:
|
83 |
-
submitted_answer = agent(question_text)
|
84 |
-
answers_payload.append(
|
85 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
86 |
except Exception as e:
|
87 |
-
|
88 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
89 |
|
90 |
if not answers_payload:
|
91 |
print("Agent did not produce any answers to submit.")
|
92 |
return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
|
93 |
|
94 |
-
# 4. Prepare Submission
|
95 |
-
submission_data = {
|
|
|
|
|
|
|
|
|
96 |
status_update = f"Agent finished. Submitting {len(answers_payload)} answers for user '{username}'..."
|
97 |
print(status_update)
|
98 |
|
@@ -162,20 +212,19 @@ with gr.Blocks() as demo:
|
|
162 |
|
163 |
run_button = gr.Button("Run Evaluation & Submit All Answers")
|
164 |
|
165 |
-
status_output = gr.Textbox(
|
|
|
|
|
166 |
# Removed max_rows=10 from DataFrame constructor
|
167 |
results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
|
168 |
|
169 |
-
run_button.click(
|
170 |
-
fn=run_and_submit_all,
|
171 |
-
outputs=[status_output, results_table]
|
172 |
-
)
|
173 |
|
174 |
if __name__ == "__main__":
|
175 |
-
print("\n" + "-"*30 + " App Starting " + "-"*30)
|
176 |
# Check for SPACE_HOST and SPACE_ID at startup for information
|
177 |
space_host_startup = os.getenv("SPACE_HOST")
|
178 |
-
space_id_startup = os.getenv("SPACE_ID")
|
179 |
|
180 |
if space_host_startup:
|
181 |
print(f"✅ SPACE_HOST found: {space_host_startup}")
|
@@ -183,14 +232,18 @@ if __name__ == "__main__":
|
|
183 |
else:
|
184 |
print("ℹ️ SPACE_HOST environment variable not found (running locally?).")
|
185 |
|
186 |
-
if space_id_startup:
|
187 |
print(f"✅ SPACE_ID found: {space_id_startup}")
|
188 |
print(f" Repo URL: https://huggingface.co/spaces/{space_id_startup}")
|
189 |
-
print(
|
|
|
|
|
190 |
else:
|
191 |
-
print(
|
|
|
|
|
192 |
|
193 |
-
print("-"*(60 + len(" App Starting ")) + "\n")
|
194 |
|
195 |
print("Launching Gradio Interface for Basic Agent Evaluation...")
|
196 |
-
demo.launch(debug=True, share=False)
|
|
|
3 |
import requests
|
4 |
import inspect
|
5 |
import pandas as pd
|
6 |
+
from llama_index.core.agent.workflow import AgentWorkflow, ToolCallResult, AgentStream
|
7 |
+
from llama_index.llms.huggingface_api import HuggingFaceInferenceAPI
|
8 |
+
from tools import APIProcessor, parse_youtube_video, transcribe_image_from_link
|
9 |
+
from search import GoogleSearch
|
10 |
+
from dotenv import load_dotenv
|
11 |
|
12 |
# (Keep Constants as is)
|
13 |
# --- Constants ---
|
14 |
DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
|
15 |
|
16 |
+
|
17 |
# --- Basic Agent Definition ---
|
18 |
# ----- THIS IS WERE YOU CAN BUILD WHAT YOU WANT ------
|
19 |
+
|
20 |
+
SYSTEM_PROMPT = "You are a general AI assistant. I will ask you a question. Report your thoughts, and finish your answer with the following template: FINAL ANSWER: [YOUR FINAL ANSWER]. YOUR FINAL ANSWER should be a number OR as few words as possible OR a comma separated list of numbers and/or strings. If you are asked for a number, don't use comma to write your number neither use units such as $ or percent sign unless specified otherwise. If you are asked for a string, don't use articles, neither abbreviations (e.g. for cities), and write the digits in plain text unless specified otherwise. If you are asked for a comma separated list, apply the above rules depending of whether the element to be put in the list is a number or a string."
|
21 |
+
|
22 |
+
|
23 |
class BasicAgent:
|
24 |
def __init__(self):
|
25 |
+
self.llm = HuggingFaceInferenceAPI(model_name="Qwen/Qwen2.5-Coder-32B-Instruct")
|
26 |
print("BasicAgent initialized.")
|
|
|
|
|
|
|
|
|
|
|
27 |
|
28 |
+
def __call__(self, question: str, task_id: str, file_name: str) -> str:
|
29 |
+
google_search = GoogleSearch().google_search
|
30 |
+
google_image_search = GoogleSearch().google_image_search
|
31 |
+
|
32 |
+
get_and_process_question_attachment = APIProcessor(
|
33 |
+
file_url=DEFAULT_API_URL + "/files/" + task_id, file_name=file_name
|
34 |
+
).get_and_process_attachment()
|
35 |
+
|
36 |
+
agent = AgentWorkflow.from_tools_or_functions(
|
37 |
+
[
|
38 |
+
google_search,
|
39 |
+
google_image_search,
|
40 |
+
get_and_process_question_attachment,
|
41 |
+
parse_youtube_video,
|
42 |
+
transcribe_image_from_link,
|
43 |
+
],
|
44 |
+
llm=self.llm,
|
45 |
+
system_prompt=SYSTEM_PROMPT,
|
46 |
+
)
|
47 |
+
|
48 |
+
response = agent.run(question)
|
49 |
+
|
50 |
+
return response
|
51 |
+
|
52 |
+
|
53 |
+
def run_and_submit_all(profile: gr.OAuthProfile | None):
|
54 |
"""
|
55 |
Fetches all questions, runs the BasicAgent on them, submits all answers,
|
56 |
and displays the results.
|
57 |
"""
|
58 |
# --- Determine HF Space Runtime URL and Repo URL ---
|
59 |
+
space_id = os.getenv("SPACE_ID") # Get the SPACE_ID for sending link to the code
|
60 |
|
61 |
if profile:
|
62 |
+
username = f"{profile.username}"
|
63 |
print(f"User logged in: {username}")
|
64 |
else:
|
65 |
print("User not logged in.")
|
|
|
86 |
response.raise_for_status()
|
87 |
questions_data = response.json()
|
88 |
if not questions_data:
|
89 |
+
print("Fetched questions list is empty.")
|
90 |
+
return "Fetched questions list is empty or invalid format.", None
|
91 |
print(f"Fetched {len(questions_data)} questions.")
|
92 |
except requests.exceptions.RequestException as e:
|
93 |
print(f"Error fetching questions: {e}")
|
94 |
return f"Error fetching questions: {e}", None
|
95 |
except requests.exceptions.JSONDecodeError as e:
|
96 |
+
print(f"Error decoding JSON response from questions endpoint: {e}")
|
97 |
+
print(f"Response text: {response.text[:500]}")
|
98 |
+
return f"Error decoding server response for questions: {e}", None
|
99 |
except Exception as e:
|
100 |
print(f"An unexpected error occurred fetching questions: {e}")
|
101 |
return f"An unexpected error occurred fetching questions: {e}", None
|
|
|
107 |
for item in questions_data:
|
108 |
task_id = item.get("task_id")
|
109 |
question_text = item.get("question")
|
110 |
+
file_name = item.get("file_name")
|
111 |
if not task_id or question_text is None:
|
112 |
print(f"Skipping item with missing task_id or question: {item}")
|
113 |
continue
|
114 |
try:
|
115 |
+
submitted_answer = agent(question_text, task_id, file_name)
|
116 |
+
answers_payload.append(
|
117 |
+
{"task_id": task_id, "submitted_answer": submitted_answer}
|
118 |
+
)
|
119 |
+
results_log.append(
|
120 |
+
{
|
121 |
+
"Task ID": task_id,
|
122 |
+
"Question": question_text,
|
123 |
+
"Submitted Answer": submitted_answer,
|
124 |
+
}
|
125 |
+
)
|
126 |
except Exception as e:
|
127 |
+
print(f"Error running agent on task {task_id}: {e}")
|
128 |
+
results_log.append(
|
129 |
+
{
|
130 |
+
"Task ID": task_id,
|
131 |
+
"Question": question_text,
|
132 |
+
"Submitted Answer": f"AGENT ERROR: {e}",
|
133 |
+
}
|
134 |
+
)
|
135 |
|
136 |
if not answers_payload:
|
137 |
print("Agent did not produce any answers to submit.")
|
138 |
return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
|
139 |
|
140 |
+
# 4. Prepare Submission
|
141 |
+
submission_data = {
|
142 |
+
"username": username.strip(),
|
143 |
+
"agent_code": agent_code,
|
144 |
+
"answers": answers_payload,
|
145 |
+
}
|
146 |
status_update = f"Agent finished. Submitting {len(answers_payload)} answers for user '{username}'..."
|
147 |
print(status_update)
|
148 |
|
|
|
212 |
|
213 |
run_button = gr.Button("Run Evaluation & Submit All Answers")
|
214 |
|
215 |
+
status_output = gr.Textbox(
|
216 |
+
label="Run Status / Submission Result", lines=5, interactive=False
|
217 |
+
)
|
218 |
# Removed max_rows=10 from DataFrame constructor
|
219 |
results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
|
220 |
|
221 |
+
run_button.click(fn=run_and_submit_all, outputs=[status_output, results_table])
|
|
|
|
|
|
|
222 |
|
223 |
if __name__ == "__main__":
|
224 |
+
print("\n" + "-" * 30 + " App Starting " + "-" * 30)
|
225 |
# Check for SPACE_HOST and SPACE_ID at startup for information
|
226 |
space_host_startup = os.getenv("SPACE_HOST")
|
227 |
+
space_id_startup = os.getenv("SPACE_ID") # Get SPACE_ID at startup
|
228 |
|
229 |
if space_host_startup:
|
230 |
print(f"✅ SPACE_HOST found: {space_host_startup}")
|
|
|
232 |
else:
|
233 |
print("ℹ️ SPACE_HOST environment variable not found (running locally?).")
|
234 |
|
235 |
+
if space_id_startup: # Print repo URLs if SPACE_ID is found
|
236 |
print(f"✅ SPACE_ID found: {space_id_startup}")
|
237 |
print(f" Repo URL: https://huggingface.co/spaces/{space_id_startup}")
|
238 |
+
print(
|
239 |
+
f" Repo Tree URL: https://huggingface.co/spaces/{space_id_startup}/tree/main"
|
240 |
+
)
|
241 |
else:
|
242 |
+
print(
|
243 |
+
"ℹ️ SPACE_ID environment variable not found (running locally?). Repo URL cannot be determined."
|
244 |
+
)
|
245 |
|
246 |
+
print("-" * (60 + len(" App Starting ")) + "\n")
|
247 |
|
248 |
print("Launching Gradio Interface for Basic Agent Evaluation...")
|
249 |
+
demo.launch(debug=True, share=False)
|
search.py
ADDED
@@ -0,0 +1,68 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from dotenv import load_dotenv
|
2 |
+
import os
|
3 |
+
import aiohttp
|
4 |
+
|
5 |
+
|
6 |
+
class GoogleSearch:
|
7 |
+
def __init__(self):
|
8 |
+
load_dotenv()
|
9 |
+
self.api_key = os.environ["GOOGLE_API_KEY"]
|
10 |
+
self.cse_id = os.getenv("GOOGLE_CSE_ID")
|
11 |
+
|
12 |
+
async def google_search(self, query: str, num_results: int = 5) -> str:
|
13 |
+
"""
|
14 |
+
Args:
|
15 |
+
query: Search query
|
16 |
+
num_results: Max results to return
|
17 |
+
Returns:
|
18 |
+
dict: JSON response from Google API.
|
19 |
+
"""
|
20 |
+
|
21 |
+
if not self.api_key or not self.cse_id:
|
22 |
+
raise ValueError(
|
23 |
+
"GOOGLE_API_KEY and GOOGLE_CSE_ID must be set in environment variables."
|
24 |
+
)
|
25 |
+
|
26 |
+
url = "https://www.googleapis.com/customsearch/v1"
|
27 |
+
params = {"key": self.api_key, "cx": self.cse_id, "q": query}
|
28 |
+
|
29 |
+
async with aiohttp.ClientSession() as session:
|
30 |
+
async with session.get(url, params=params) as response:
|
31 |
+
response.raise_for_status()
|
32 |
+
data = await response.json()
|
33 |
+
results = "Web Search results:\n\n" + "\n\n".join(
|
34 |
+
[
|
35 |
+
f"Link:{result['link']}\nTitle:{result['title']}\nSnippet:{result['snippet']}"
|
36 |
+
for result in data["items"][:num_results]
|
37 |
+
]
|
38 |
+
)
|
39 |
+
return results
|
40 |
+
|
41 |
+
async def google_image_search(self, query: str, num_results: int = 5) -> str:
|
42 |
+
"""
|
43 |
+
Args:
|
44 |
+
query: Search query
|
45 |
+
num_results: Max results to return
|
46 |
+
Returns:
|
47 |
+
dict: JSON response from Google API.
|
48 |
+
"""
|
49 |
+
|
50 |
+
if not self.api_key or not self.cse_id:
|
51 |
+
raise ValueError(
|
52 |
+
"GOOGLE_API_KEY and GOOGLE_CSE_ID must be set in environment variables."
|
53 |
+
)
|
54 |
+
|
55 |
+
url = "https://www.googleapis.com/customsearch/v1"
|
56 |
+
params = {"key": self.api_key, "cx": self.cse_id, "q": query}
|
57 |
+
|
58 |
+
async with aiohttp.ClientSession() as session:
|
59 |
+
async with session.get(url, params=params) as response:
|
60 |
+
response.raise_for_status()
|
61 |
+
data = await response.json()
|
62 |
+
results = "Web Search results:\n\n" + "\n\n".join(
|
63 |
+
[
|
64 |
+
f"Link:{result['link']}\nTitle:{result['title']}"
|
65 |
+
for result in data["items"][:num_results]
|
66 |
+
]
|
67 |
+
)
|
68 |
+
return results
|
tools.py
ADDED
@@ -0,0 +1,192 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import requests
|
2 |
+
from dotenv import load_dotenv
|
3 |
+
from openai import OpenAI
|
4 |
+
from utils import process_image_for_gpt
|
5 |
+
import pandas as pd
|
6 |
+
import tempfile
|
7 |
+
import os
|
8 |
+
import io
|
9 |
+
import yt_dlp
|
10 |
+
|
11 |
+
|
12 |
+
def transcribe_image_from_link(image_link: str) -> str:
|
13 |
+
"""
|
14 |
+
Args:
|
15 |
+
image_link (str): URL of the image to transcribe
|
16 |
+
"""
|
17 |
+
client = OpenAI() # Uses OPENAI_API_KEY environment variable
|
18 |
+
|
19 |
+
response = client.chat.completions.create(
|
20 |
+
model="gpt-4o",
|
21 |
+
messages=[
|
22 |
+
{
|
23 |
+
"role": "user",
|
24 |
+
"content": [
|
25 |
+
{
|
26 |
+
"type": "text",
|
27 |
+
"text": """Please transcribe all text visible in this image.
|
28 |
+
Extract the text exactly as it appears, maintaining formatting when possible.
|
29 |
+
If there's no readable text, respond with 'No text found in image'.""",
|
30 |
+
},
|
31 |
+
{
|
32 |
+
"type": "image_url",
|
33 |
+
"image_url": {
|
34 |
+
"url": image_link,
|
35 |
+
"detail": "high",
|
36 |
+
},
|
37 |
+
},
|
38 |
+
],
|
39 |
+
}
|
40 |
+
],
|
41 |
+
max_tokens=1000,
|
42 |
+
temperature=0,
|
43 |
+
)
|
44 |
+
|
45 |
+
transcribed_text = response.choices[0].message.content.strip()
|
46 |
+
return transcribed_text
|
47 |
+
|
48 |
+
|
49 |
+
def parse_youtube_video(youtube_url: str) -> str:
|
50 |
+
"""Returns text transcript of a youtube video
|
51 |
+
Args:
|
52 |
+
youtube_url: the full url linking to the video to transcribe
|
53 |
+
"""
|
54 |
+
load_dotenv()
|
55 |
+
client = OpenAI()
|
56 |
+
|
57 |
+
# Configure yt-dlp to extract audio
|
58 |
+
ydl_opts = {
|
59 |
+
"format": "bestaudio/best",
|
60 |
+
"postprocessors": [
|
61 |
+
{
|
62 |
+
"key": "FFmpegExtractAudio",
|
63 |
+
"preferredcodec": "mp3",
|
64 |
+
"preferredquality": "192",
|
65 |
+
}
|
66 |
+
],
|
67 |
+
"outtmpl": "%(title)s.%(ext)s",
|
68 |
+
}
|
69 |
+
|
70 |
+
with tempfile.TemporaryDirectory() as temp_dir:
|
71 |
+
ydl_opts["outtmpl"] = os.path.join(temp_dir, "%(title)s.%(ext)s")
|
72 |
+
|
73 |
+
# Download audio
|
74 |
+
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
|
75 |
+
info = ydl.extract_info(youtube_url, download=True)
|
76 |
+
title = info["title"]
|
77 |
+
|
78 |
+
# Find the downloaded audio file
|
79 |
+
audio_file = None
|
80 |
+
for file in os.listdir(temp_dir):
|
81 |
+
if file.endswith(".mp3"):
|
82 |
+
audio_file = os.path.join(temp_dir, file)
|
83 |
+
break
|
84 |
+
|
85 |
+
if not audio_file:
|
86 |
+
raise Exception("Audio file not found")
|
87 |
+
|
88 |
+
# Transcribe with Whisper
|
89 |
+
with open(audio_file, "rb") as audio:
|
90 |
+
transcript = client.audio.transcriptions.create(
|
91 |
+
model="gpt-4o-transcribe", file=audio
|
92 |
+
)
|
93 |
+
|
94 |
+
return {"title": title, "transcript": transcript.text}
|
95 |
+
|
96 |
+
|
97 |
+
class APIProcessor:
|
98 |
+
def __init__(self, file_url: str, file_name: str):
|
99 |
+
load_dotenv()
|
100 |
+
self.file_url = file_url
|
101 |
+
self.file_name = file_name
|
102 |
+
self.client = OpenAI()
|
103 |
+
|
104 |
+
def _transcribe_mp3(self, response: requests.Response) -> str:
|
105 |
+
with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as temp_file:
|
106 |
+
for chunk in response.iter_content(chunk_size=8192):
|
107 |
+
temp_file.write(chunk)
|
108 |
+
temp_file_path = temp_file.name
|
109 |
+
|
110 |
+
try:
|
111 |
+
with open(temp_file_path, "rb") as audio_file:
|
112 |
+
transcription = self.client.audio.transcriptions.create(
|
113 |
+
model="gpt-4o-transcribe",
|
114 |
+
file=audio_file,
|
115 |
+
)
|
116 |
+
return transcription.text
|
117 |
+
except Exception as e:
|
118 |
+
print(str(e))
|
119 |
+
finally:
|
120 |
+
os.unlink(temp_file_path)
|
121 |
+
|
122 |
+
def _transcribe_image(self, response: requests.Response) -> str:
|
123 |
+
image_bytes = response.content
|
124 |
+
base64_image = process_image_for_gpt(image_bytes)
|
125 |
+
TRANSCRIPTION_PROMPT = """Please in detail transcribe as much of the output information you can via text. Feel free to use ASCII."""
|
126 |
+
image_message = [
|
127 |
+
{"type": "text", "text": TRANSCRIPTION_PROMPT},
|
128 |
+
{
|
129 |
+
"type": "image_url",
|
130 |
+
"image_url": {
|
131 |
+
"url": f"data:image/jpeg;base64,{base64_image}",
|
132 |
+
},
|
133 |
+
},
|
134 |
+
]
|
135 |
+
response = self.client.chat.completions.create(
|
136 |
+
model="gpt-4o",
|
137 |
+
messages=[{"role": "user", "content": image_message}],
|
138 |
+
max_tokens=1000,
|
139 |
+
)
|
140 |
+
return response.choices[0].message.content
|
141 |
+
|
142 |
+
def _transcribe_spreadsheet(self, response: requests.Response) -> str:
|
143 |
+
try:
|
144 |
+
excel_data = io.BytesIO(response.content)
|
145 |
+
excel_file = pd.ExcelFile(excel_data)
|
146 |
+
sheets = excel_file.sheet_names
|
147 |
+
all_sheets_data = {}
|
148 |
+
|
149 |
+
for sheet in sheets:
|
150 |
+
df = excel_file.parse(sheet_name=sheet)
|
151 |
+
all_sheets_data[sheet] = df.to_string()
|
152 |
+
|
153 |
+
return str(all_sheets_data)
|
154 |
+
except Exception as e:
|
155 |
+
return f"Error processing spreadsheet: {e}"
|
156 |
+
|
157 |
+
def get_and_process_attachment(self) -> str:
|
158 |
+
"""For current question, download and process the file associated if it exists.
|
159 |
+
Returns:
|
160 |
+
Parsed text output of the attachment
|
161 |
+
"""
|
162 |
+
response = requests.get(self.file_url, timeout=15)
|
163 |
+
response.raise_for_status()
|
164 |
+
|
165 |
+
file_extension = self.file_name.split(".")[-1]
|
166 |
+
|
167 |
+
if file_extension == "mp3":
|
168 |
+
parsed_text = self._transcribe_mp3(response)
|
169 |
+
elif file_extension == "xlsx":
|
170 |
+
parsed_text = self._transcribe_spreadsheet(response)
|
171 |
+
elif file_extension == "png":
|
172 |
+
parsed_text = self._transcribe_image(response)
|
173 |
+
else:
|
174 |
+
parsed_text = response.content
|
175 |
+
|
176 |
+
return parsed_text
|
177 |
+
|
178 |
+
|
179 |
+
if __name__ == "__main__":
|
180 |
+
# attempt to process file examples from API
|
181 |
+
# def get_file_api_url(task_id: str) -> str:
|
182 |
+
# return "https://agents-course-unit4-scoring.hf.space" + "/files/" + task_id
|
183 |
+
|
184 |
+
# audio_task_processor = APIProcessor(
|
185 |
+
# file_name="7bd855d8-463d-4ed5-93ca-5fe35145f733.xlsx",
|
186 |
+
# file_url=get_file_api_url("7bd855d8-463d-4ed5-93ca-5fe35145f733"),
|
187 |
+
# )
|
188 |
+
|
189 |
+
# response = audio_task_processor.get_and_process_attachment()
|
190 |
+
# print(response)
|
191 |
+
result = parse_youtube_video("https://www.youtube.com/watch?v=1htKBjuUWec")
|
192 |
+
print(result)
|
utils.py
ADDED
@@ -0,0 +1,52 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from PIL import Image
|
2 |
+
from io import BytesIO
|
3 |
+
import base64
|
4 |
+
|
5 |
+
|
6 |
+
def encode_image_in_base64(image: bytes):
|
7 |
+
return base64.b64encode(image).decode("utf-8")
|
8 |
+
|
9 |
+
|
10 |
+
def replace_transparent_pixels(image_bytes: bytes):
|
11 |
+
"""
|
12 |
+
Opens a PNG image, and replaces transparent pixels with white pixels.
|
13 |
+
|
14 |
+
Args:
|
15 |
+
image_path: The path to the PNG image.
|
16 |
+
|
17 |
+
Returns:
|
18 |
+
The path to the modified image.
|
19 |
+
"""
|
20 |
+
try:
|
21 |
+
img = Image.open(BytesIO(image_bytes))
|
22 |
+
img = img.convert("RGBA")
|
23 |
+
|
24 |
+
pixels = img.getdata()
|
25 |
+
|
26 |
+
new_pixels = []
|
27 |
+
for item in pixels:
|
28 |
+
if item[3] == 0:
|
29 |
+
new_pixels.append((255, 255, 255, 255))
|
30 |
+
else:
|
31 |
+
new_pixels.append(item)
|
32 |
+
|
33 |
+
img.putdata(new_pixels)
|
34 |
+
|
35 |
+
img_byte_arr = BytesIO()
|
36 |
+
img.save(img_byte_arr, format="PNG")
|
37 |
+
img_byte_arr = img_byte_arr.getvalue()
|
38 |
+
|
39 |
+
return img_byte_arr
|
40 |
+
|
41 |
+
except FileNotFoundError:
|
42 |
+
print(f"Error: The file was not found.")
|
43 |
+
return None
|
44 |
+
except Exception as e:
|
45 |
+
print(f"An error occurred: {e}")
|
46 |
+
return None
|
47 |
+
|
48 |
+
|
49 |
+
def process_image_for_gpt(image_bytes: bytes) -> str:
|
50 |
+
image_bytes = replace_transparent_pixels(image_bytes)
|
51 |
+
base64_image = encode_image_in_base64(image_bytes)
|
52 |
+
return base64_image
|