Spaces:
Sleeping
Sleeping
Add final formatting step
Browse files
app.py
CHANGED
@@ -3,7 +3,8 @@ import gradio as gr
|
|
3 |
import requests
|
4 |
import inspect
|
5 |
import pandas as pd
|
6 |
-
from llama_index.core.
|
|
|
7 |
from llama_index.llms.huggingface_api import HuggingFaceInferenceAPI
|
8 |
from tools import (
|
9 |
APIProcessor,
|
@@ -11,8 +12,9 @@ from tools import (
|
|
11 |
transcribe_image_from_link,
|
12 |
transcribe_webpage,
|
13 |
)
|
|
|
14 |
from search import GoogleSearch
|
15 |
-
|
16 |
|
17 |
# (Keep Constants as is)
|
18 |
# --- Constants ---
|
@@ -30,13 +32,13 @@ class BasicAgent:
|
|
30 |
self.llm = HuggingFaceInferenceAPI(model_name="Qwen/Qwen2.5-Coder-32B-Instruct")
|
31 |
print("BasicAgent initialized.")
|
32 |
|
33 |
-
def __call__(self, question: str, task_id: str, file_name: str) -> str:
|
34 |
google_search = GoogleSearch().google_search
|
35 |
google_image_search = GoogleSearch().google_image_search
|
36 |
|
37 |
get_and_process_question_attachment = APIProcessor(
|
38 |
file_url=DEFAULT_API_URL + "/files/" + task_id, file_name=file_name
|
39 |
-
).get_and_process_attachment
|
40 |
|
41 |
agent = AgentWorkflow.from_tools_or_functions(
|
42 |
[
|
@@ -50,10 +52,22 @@ class BasicAgent:
|
|
50 |
llm=self.llm,
|
51 |
system_prompt=SYSTEM_PROMPT,
|
52 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
53 |
|
54 |
-
response =
|
55 |
|
56 |
-
|
|
|
|
|
57 |
|
58 |
|
59 |
def run_and_submit_all(profile: gr.OAuthProfile | None):
|
@@ -226,6 +240,36 @@ with gr.Blocks() as demo:
|
|
226 |
|
227 |
run_button.click(fn=run_and_submit_all, outputs=[status_output, results_table])
|
228 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
229 |
if __name__ == "__main__":
|
230 |
print("\n" + "-" * 30 + " App Starting " + "-" * 30)
|
231 |
# Check for SPACE_HOST and SPACE_ID at startup for information
|
@@ -253,3 +297,5 @@ if __name__ == "__main__":
|
|
253 |
|
254 |
print("Launching Gradio Interface for Basic Agent Evaluation...")
|
255 |
demo.launch(debug=True, share=False)
|
|
|
|
|
|
3 |
import requests
|
4 |
import inspect
|
5 |
import pandas as pd
|
6 |
+
from llama_index.core.workflow import Context
|
7 |
+
from llama_index.core.agent.workflow import AgentWorkflow, ToolCallResult, AgentStream
|
8 |
from llama_index.llms.huggingface_api import HuggingFaceInferenceAPI
|
9 |
from tools import (
|
10 |
APIProcessor,
|
|
|
12 |
transcribe_image_from_link,
|
13 |
transcribe_webpage,
|
14 |
)
|
15 |
+
from utils import format_final_answer
|
16 |
from search import GoogleSearch
|
17 |
+
import asyncio
|
18 |
|
19 |
# (Keep Constants as is)
|
20 |
# --- Constants ---
|
|
|
32 |
self.llm = HuggingFaceInferenceAPI(model_name="Qwen/Qwen2.5-Coder-32B-Instruct")
|
33 |
print("BasicAgent initialized.")
|
34 |
|
35 |
+
async def __call__(self, question: str, task_id: str, file_name: str) -> str:
|
36 |
google_search = GoogleSearch().google_search
|
37 |
google_image_search = GoogleSearch().google_image_search
|
38 |
|
39 |
get_and_process_question_attachment = APIProcessor(
|
40 |
file_url=DEFAULT_API_URL + "/files/" + task_id, file_name=file_name
|
41 |
+
).get_and_process_attachment
|
42 |
|
43 |
agent = AgentWorkflow.from_tools_or_functions(
|
44 |
[
|
|
|
52 |
llm=self.llm,
|
53 |
system_prompt=SYSTEM_PROMPT,
|
54 |
)
|
55 |
+
ctx = Context(agent)
|
56 |
+
handler = agent.run(question, ctx=ctx)
|
57 |
+
async for ev in handler.stream_events():
|
58 |
+
if isinstance(ev, ToolCallResult):
|
59 |
+
print("")
|
60 |
+
print(
|
61 |
+
"Called tool: ", ev.tool_name, ev.tool_kwargs, "=>", ev.tool_output
|
62 |
+
)
|
63 |
+
elif isinstance(ev, AgentStream): # showing the thought process
|
64 |
+
print(ev.delta, end="", flush=True)
|
65 |
|
66 |
+
response = await handler
|
67 |
|
68 |
+
##format final answer
|
69 |
+
final_answer = format_final_answer(question, response)
|
70 |
+
return final_answer
|
71 |
|
72 |
|
73 |
def run_and_submit_all(profile: gr.OAuthProfile | None):
|
|
|
240 |
|
241 |
run_button.click(fn=run_and_submit_all, outputs=[status_output, results_table])
|
242 |
|
243 |
+
|
244 |
+
# async def main():
|
245 |
+
# agent = BasicAgent()
|
246 |
+
# api_url = DEFAULT_API_URL
|
247 |
+
# questions_url = f"{api_url}/questions"
|
248 |
+
# print(f"Fetching questions from: {questions_url}")
|
249 |
+
|
250 |
+
# response = requests.get(questions_url, timeout=15)
|
251 |
+
# response.raise_for_status()
|
252 |
+
# questions_data = response.json()
|
253 |
+
|
254 |
+
# # 3. Run your Agent
|
255 |
+
# results_log = []
|
256 |
+
# answers_payload = []
|
257 |
+
# print(f"Running agent on {len(questions_data)} questions...")
|
258 |
+
# item = questions_data[0]
|
259 |
+
# task_id = item.get("task_id")
|
260 |
+
# question_text = item.get("question")
|
261 |
+
# file_name = item.get("file_name")
|
262 |
+
# submitted_answer = await agent(question_text, task_id, file_name)
|
263 |
+
# answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
|
264 |
+
# results_log.append(
|
265 |
+
# {
|
266 |
+
# "Task ID": task_id,
|
267 |
+
# "Question": question_text,
|
268 |
+
# "Submitted Answer": submitted_answer,
|
269 |
+
# }
|
270 |
+
# )
|
271 |
+
|
272 |
+
|
273 |
if __name__ == "__main__":
|
274 |
print("\n" + "-" * 30 + " App Starting " + "-" * 30)
|
275 |
# Check for SPACE_HOST and SPACE_ID at startup for information
|
|
|
297 |
|
298 |
print("Launching Gradio Interface for Basic Agent Evaluation...")
|
299 |
demo.launch(debug=True, share=False)
|
300 |
+
|
301 |
+
# asyncio.run(main())
|
requirements.txt
CHANGED
@@ -7,6 +7,6 @@ pandas
|
|
7 |
aiohttp
|
8 |
Pillow
|
9 |
yt-dlp
|
10 |
-
|
11 |
llama-index-utils-workflow
|
12 |
llama-index-llms-huggingface-api
|
|
|
7 |
aiohttp
|
8 |
Pillow
|
9 |
yt-dlp
|
10 |
+
html2text
|
11 |
llama-index-utils-workflow
|
12 |
llama-index-llms-huggingface-api
|
tools.py
CHANGED
@@ -8,8 +8,9 @@ import os
|
|
8 |
import io
|
9 |
import yt_dlp
|
10 |
import re
|
11 |
-
|
12 |
from requests.exceptions import RequestException
|
|
|
13 |
|
14 |
|
15 |
def transcribe_image_from_link(image_link: str) -> str:
|
@@ -49,6 +50,13 @@ def transcribe_image_from_link(image_link: str) -> str:
|
|
49 |
return transcribed_text
|
50 |
|
51 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
52 |
def transcribe_webpage(website_url: str) -> str:
|
53 |
"""Visits website url and returns markdown of contents"""
|
54 |
try:
|
@@ -56,21 +64,25 @@ def transcribe_webpage(website_url: str) -> str:
|
|
56 |
response = requests.get(website_url, timeout=20)
|
57 |
response.raise_for_status() # Raise an exception for bad status codes
|
58 |
|
59 |
-
|
60 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
61 |
|
62 |
-
|
63 |
-
|
64 |
-
|
|
|
|
|
65 |
|
66 |
# Remove multiple line breaks
|
67 |
markdown_content = re.sub(r"\n{3,}", "\n\n", markdown_content)
|
68 |
|
69 |
-
|
70 |
-
f.write("\n\nMarkdown content:\n\n")
|
71 |
-
f.write(markdown_content)
|
72 |
-
|
73 |
-
return markdown_content
|
74 |
|
75 |
except requests.exceptions.Timeout:
|
76 |
return "The request timed out. Please try again later or check the URL."
|
@@ -218,11 +230,15 @@ if __name__ == "__main__":
|
|
218 |
# return "https://agents-course-unit4-scoring.hf.space" + "/files/" + task_id
|
219 |
|
220 |
# audio_task_processor = APIProcessor(
|
221 |
-
# file_name="
|
222 |
-
# file_url=get_file_api_url("
|
223 |
# )
|
224 |
|
225 |
# response = audio_task_processor.get_and_process_attachment()
|
226 |
# print(response)
|
227 |
-
result = parse_youtube_video("https://www.youtube.com/watch?v=1htKBjuUWec")
|
228 |
-
print(result)
|
|
|
|
|
|
|
|
|
|
8 |
import io
|
9 |
import yt_dlp
|
10 |
import re
|
11 |
+
import html2text
|
12 |
from requests.exceptions import RequestException
|
13 |
+
from bs4 import BeautifulSoup
|
14 |
|
15 |
|
16 |
def transcribe_image_from_link(image_link: str) -> str:
|
|
|
50 |
return transcribed_text
|
51 |
|
52 |
|
53 |
+
def truncate_content(content: str, max_length: int = 10000) -> str:
|
54 |
+
if len(content) <= max_length:
|
55 |
+
return content
|
56 |
+
else:
|
57 |
+
return content[:max_length]
|
58 |
+
|
59 |
+
|
60 |
def transcribe_webpage(website_url: str) -> str:
|
61 |
"""Visits website url and returns markdown of contents"""
|
62 |
try:
|
|
|
64 |
response = requests.get(website_url, timeout=20)
|
65 |
response.raise_for_status() # Raise an exception for bad status codes
|
66 |
|
67 |
+
soup = BeautifulSoup(response.text, "html.parser")
|
68 |
+
content_div = soup.find("div", id="mw-content-text")
|
69 |
+
|
70 |
+
if not content_div:
|
71 |
+
return "Main content not found."
|
72 |
+
|
73 |
+
# Only extract <p> and <table> tags
|
74 |
+
elements = content_div.find_all(["p", "table"])
|
75 |
|
76 |
+
# Join selected HTML chunks
|
77 |
+
html_subset = "".join(str(el) for el in elements)
|
78 |
+
|
79 |
+
# Convert the HTML content to Markdown
|
80 |
+
markdown_content = html2text.HTML2Text().handle(str(html_subset))
|
81 |
|
82 |
# Remove multiple line breaks
|
83 |
markdown_content = re.sub(r"\n{3,}", "\n\n", markdown_content)
|
84 |
|
85 |
+
return truncate_content(markdown_content, 20000)
|
|
|
|
|
|
|
|
|
86 |
|
87 |
except requests.exceptions.Timeout:
|
88 |
return "The request timed out. Please try again later or check the URL."
|
|
|
230 |
# return "https://agents-course-unit4-scoring.hf.space" + "/files/" + task_id
|
231 |
|
232 |
# audio_task_processor = APIProcessor(
|
233 |
+
# file_name="",
|
234 |
+
# file_url=get_file_api_url("8e867cd7-cff9-4e6c-867a-ff5ddc2550be"),
|
235 |
# )
|
236 |
|
237 |
# response = audio_task_processor.get_and_process_attachment()
|
238 |
# print(response)
|
239 |
+
# result = parse_youtube_video("https://www.youtube.com/watch?v=1htKBjuUWec")
|
240 |
+
# print(result)
|
241 |
+
text = transcribe_webpage(
|
242 |
+
"https://en.wikipedia.org/wiki/Mercedes_Sosa#Studio_albums"
|
243 |
+
)
|
244 |
+
print(text)
|
utils.py
CHANGED
@@ -1,6 +1,7 @@
|
|
1 |
from PIL import Image
|
2 |
from io import BytesIO
|
3 |
import base64
|
|
|
4 |
|
5 |
|
6 |
def encode_image_in_base64(image: bytes):
|
@@ -50,3 +51,27 @@ def process_image_for_gpt(image_bytes: bytes) -> str:
|
|
50 |
image_bytes = replace_transparent_pixels(image_bytes)
|
51 |
base64_image = encode_image_in_base64(image_bytes)
|
52 |
return base64_image
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
from PIL import Image
|
2 |
from io import BytesIO
|
3 |
import base64
|
4 |
+
from openai import OpenAI
|
5 |
|
6 |
|
7 |
def encode_image_in_base64(image: bytes):
|
|
|
51 |
image_bytes = replace_transparent_pixels(image_bytes)
|
52 |
base64_image = encode_image_in_base64(image_bytes)
|
53 |
return base64_image
|
54 |
+
|
55 |
+
|
56 |
+
def format_final_answer(question: str, answer: str) -> str:
|
57 |
+
"""Always call to format final answer"""
|
58 |
+
client = OpenAI()
|
59 |
+
response = client.chat.completions.create(
|
60 |
+
model="gpt-4o",
|
61 |
+
messages=[
|
62 |
+
{
|
63 |
+
"role": "system",
|
64 |
+
"content": """ You're tasked with reformatting an answer from an unreliable AI into the expected format as per their instructions.
|
65 |
+
<instructions>
|
66 |
+
You are a general AI assistant. I will ask you a question. Report your thoughts, and finish your answer with the following template: FINAL ANSWER: [YOUR FINAL ANSWER]. YOUR FINAL ANSWER should be a number OR as few words as possible OR a comma separated list of numbers and/or strings. If you are asked for a number, don't use comma to write your number neither use units such as $ or percent sign unless specified otherwise. If you are asked for a string, don't use articles, neither abbreviations (e.g. for cities), and write the digits in plain text unless specified otherwise. If you are asked for a comma separated list, apply the above rules depending of whether the element to be put in the list is a number or a string.
|
67 |
+
</instructions>
|
68 |
+
<question>"""
|
69 |
+
+ question
|
70 |
+
+ """
|
71 |
+
Now here is their answer. Only reply with the corrected formatting
|
72 |
+
""",
|
73 |
+
},
|
74 |
+
{"role": "user", "content": str(answer)},
|
75 |
+
],
|
76 |
+
)
|
77 |
+
return response.choices[0].message.content.strip()
|