Spaces:
Sleeping
Sleeping
Add file support and youtube transcript 2nd choice
Browse files- .gitattributes +4 -0
- app.py +5 -0
- audio_tools.py +8 -2
- community_tools.py +15 -4
- load_data.py +8 -0
- mini_agents.py +4 -4
- requirements.txt +1 -0
- utils.py +71 -0
- vlm_tools.py +21 -0
.gitattributes
CHANGED
@@ -33,3 +33,7 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
36 |
+
*.mp3 filter=lfs diff=lfs merge=lfs -text
|
37 |
+
*.jpg filter=lfs diff=lfs merge=lfs -text
|
38 |
+
*.png filter=lfs diff=lfs merge=lfs -text
|
39 |
+
*.pptx filter=lfs diff=lfs merge=lfs -text
|
app.py
CHANGED
@@ -3,6 +3,7 @@ import gradio as gr
|
|
3 |
import requests
|
4 |
import pandas as pd
|
5 |
from mini_agents import master_agent
|
|
|
6 |
|
7 |
# (Keep Constants as is)
|
8 |
# --- Constants ---
|
@@ -77,10 +78,14 @@ def run_and_submit_all( profile: gr.OAuthProfile | None, mock_submission: bool =
|
|
77 |
for item in questions_data:
|
78 |
task_id = item.get("task_id")
|
79 |
question_text = item.get("question")
|
|
|
80 |
if not task_id or question_text is None:
|
81 |
print(f"Skipping item with missing task_id or question: {item}")
|
82 |
continue
|
83 |
try:
|
|
|
|
|
|
|
84 |
submitted_answer = agent(question_text)
|
85 |
answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
|
86 |
results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
|
|
|
3 |
import requests
|
4 |
import pandas as pd
|
5 |
from mini_agents import master_agent
|
6 |
+
from utils import get_full_file_path
|
7 |
|
8 |
# (Keep Constants as is)
|
9 |
# --- Constants ---
|
|
|
78 |
for item in questions_data:
|
79 |
task_id = item.get("task_id")
|
80 |
question_text = item.get("question")
|
81 |
+
file_path = get_full_file_path(task_id)
|
82 |
if not task_id or question_text is None:
|
83 |
print(f"Skipping item with missing task_id or question: {item}")
|
84 |
continue
|
85 |
try:
|
86 |
+
question_text = question_text + "\n\nHere is the task_id for this question: " + task_id
|
87 |
+
if file_path:
|
88 |
+
question_text = question_text + f"\n\nHere is also the path to the file for the task (file name matches with task ID and is not in plain English): {file_path}"
|
89 |
submitted_answer = agent(question_text)
|
90 |
answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
|
91 |
results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
|
audio_tools.py
CHANGED
@@ -43,8 +43,14 @@ def audio_to_base64(file_path_or_key: str, state: dict) -> str:
|
|
43 |
file_path = file_path_or_key
|
44 |
|
45 |
# Load the audio file
|
46 |
-
|
47 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
48 |
# Export the audio to a BytesIO object
|
49 |
buffer = BytesIO()
|
50 |
audio.export(buffer, format="wav") # You can change the format if needed
|
|
|
43 |
file_path = file_path_or_key
|
44 |
|
45 |
# Load the audio file
|
46 |
+
try:
|
47 |
+
audio = AudioSegment.from_file(file_path)
|
48 |
+
except Exception as e:
|
49 |
+
current_file_path = os.path.abspath(__file__)
|
50 |
+
current_file_dir = os.path.dirname(current_file_path)
|
51 |
+
file_path = os.path.join(current_file_dir, file_path.replace("Final_Assignment_Template", ""))
|
52 |
+
audio = AudioSegment.from_file(file_path)
|
53 |
+
|
54 |
# Export the audio to a BytesIO object
|
55 |
buffer = BytesIO()
|
56 |
audio.export(buffer, format="wav") # You can change the format if needed
|
community_tools.py
CHANGED
@@ -2,6 +2,7 @@ from langchain_google_community import GooglePlacesTool
|
|
2 |
from langchain_community.agent_toolkits.load_tools import load_tools
|
3 |
from langchain_community.document_loaders import YoutubeLoader
|
4 |
from smolagents.tools import Tool, tool
|
|
|
5 |
|
6 |
google_map_tool = Tool.from_langchain(GooglePlacesTool())
|
7 |
|
@@ -24,7 +25,17 @@ def get_youtube_transcript_from_url(video_url: str)->str:
|
|
24 |
The transcript of the YouTube video as a string
|
25 |
"""
|
26 |
video_id = video_url.split("=")[1]
|
27 |
-
|
28 |
-
|
29 |
-
|
30 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2 |
from langchain_community.agent_toolkits.load_tools import load_tools
|
3 |
from langchain_community.document_loaders import YoutubeLoader
|
4 |
from smolagents.tools import Tool, tool
|
5 |
+
from youtube_transcript_api import YouTubeTranscriptApi
|
6 |
|
7 |
google_map_tool = Tool.from_langchain(GooglePlacesTool())
|
8 |
|
|
|
25 |
The transcript of the YouTube video as a string
|
26 |
"""
|
27 |
video_id = video_url.split("=")[1]
|
28 |
+
try:
|
29 |
+
ytt_api = YouTubeTranscriptApi()
|
30 |
+
fetched_transcript = ytt_api.fetch(video_id)
|
31 |
+
|
32 |
+
# is iterable
|
33 |
+
transcript = ""
|
34 |
+
for snippet in fetched_transcript:
|
35 |
+
transcript += f"{snippet['text']}\n"
|
36 |
+
return transcript
|
37 |
+
except Exception as e:
|
38 |
+
youtube_loader = YoutubeLoader(video_id=video_id)
|
39 |
+
docs = youtube_loader.load()
|
40 |
+
transcript = docs[0].page_content
|
41 |
+
return transcript
|
load_data.py
ADDED
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import datasets
|
2 |
+
|
3 |
+
def download_dataset(dataset_name: str, name: str):
|
4 |
+
dataset = datasets.load_dataset(dataset_name, name, trust_remote_code=True)
|
5 |
+
return dataset
|
6 |
+
|
7 |
+
dataset = download_dataset("gaia-benchmark/GAIA", "2023_all")
|
8 |
+
dataset.save_to_disk("GAIA_2023_all")
|
mini_agents.py
CHANGED
@@ -2,7 +2,7 @@ from smolagents import CodeAgent, InferenceClientModel
|
|
2 |
from tools import sort_list, operate_two_numbers, convert_number, load_dataframe_from_csv
|
3 |
from tools import tavily_search_tool, visit_webpage_tool
|
4 |
from tools import to_dataframe, to_json, get_dataframe_data, get_dataframe_column, get_dataframe_row, get_dataframe_groupby
|
5 |
-
from vlm_tools import download_image, image_processing, object_detection_tool, ocr_scan_tool, extract_frames_from_video
|
6 |
from audio_tools import transcribe_audio_tool, audio_to_base64, noise_reduction, audio_segmentation, speaker_diarization
|
7 |
from community_tools import community_tools, get_youtube_transcript_from_url
|
8 |
import os
|
@@ -53,10 +53,10 @@ vlm_model = InferenceClientModel(
|
|
53 |
|
54 |
vlm_agent = CodeAgent(
|
55 |
model=vlm_model,
|
56 |
-
tools=[download_image, image_processing, object_detection_tool, ocr_scan_tool, extract_frames_from_video],
|
57 |
max_steps=6,
|
58 |
# prompt_templates=PROMPT_TEMPLATE["vlm_agent"],
|
59 |
-
additional_authorized_imports=["cv2", "numpy", "pytesseract", "requests", "base64", "onnxruntime", "PIL", "io"],
|
60 |
name="vlm_agent",
|
61 |
description="This agent is responsible for downloading images, processing images, detecting objects in them and extracting text from them."
|
62 |
)
|
@@ -127,7 +127,7 @@ master_agent = CodeAgent(
|
|
127 |
tools=[sort_list, get_youtube_transcript_from_url, *community_tools, tavily_search_tool, visit_webpage_tool],
|
128 |
add_base_tools=True,
|
129 |
max_steps=20,
|
130 |
-
additional_authorized_imports=["math", "pandas", "json", "numpy", "io", "os", "logging", "yaml", "pyplot", "matplotlib", 'hmmlearn', 'pickle', 'sklearn', 'scipy', 'datetime', 'typing'],
|
131 |
verbosity_level=logging.INFO,
|
132 |
planning_interval=4,
|
133 |
prompt_templates=PROMPT_TEMPLATE["master_agent"],
|
|
|
2 |
from tools import sort_list, operate_two_numbers, convert_number, load_dataframe_from_csv
|
3 |
from tools import tavily_search_tool, visit_webpage_tool
|
4 |
from tools import to_dataframe, to_json, get_dataframe_data, get_dataframe_column, get_dataframe_row, get_dataframe_groupby
|
5 |
+
from vlm_tools import download_image, image_processing, object_detection_tool, ocr_scan_tool, extract_frames_from_video, get_image_from_file
|
6 |
from audio_tools import transcribe_audio_tool, audio_to_base64, noise_reduction, audio_segmentation, speaker_diarization
|
7 |
from community_tools import community_tools, get_youtube_transcript_from_url
|
8 |
import os
|
|
|
53 |
|
54 |
vlm_agent = CodeAgent(
|
55 |
model=vlm_model,
|
56 |
+
tools=[download_image, image_processing, object_detection_tool, ocr_scan_tool, extract_frames_from_video, get_image_from_file],
|
57 |
max_steps=6,
|
58 |
# prompt_templates=PROMPT_TEMPLATE["vlm_agent"],
|
59 |
+
additional_authorized_imports=["cv2", "numpy", "pytesseract", "requests", "base64", "onnxruntime", "PIL", "io", "os", "logging", "yaml", "pyplot", "matplotlib", 'hmmlearn', 'pickle'],
|
60 |
name="vlm_agent",
|
61 |
description="This agent is responsible for downloading images, processing images, detecting objects in them and extracting text from them."
|
62 |
)
|
|
|
127 |
tools=[sort_list, get_youtube_transcript_from_url, *community_tools, tavily_search_tool, visit_webpage_tool],
|
128 |
add_base_tools=True,
|
129 |
max_steps=20,
|
130 |
+
additional_authorized_imports=["math", "pandas", "json", "numpy", "io", "os", "logging", "yaml", "pyplot", "matplotlib", 'hmmlearn', 'pickle', 'sklearn', 'scipy', 'datetime', 'typing', 'markdownify', 'requests', 'json'],
|
131 |
verbosity_level=logging.INFO,
|
132 |
planning_interval=4,
|
133 |
prompt_templates=PROMPT_TEMPLATE["master_agent"],
|
requirements.txt
CHANGED
@@ -65,6 +65,7 @@ langchain-text-splitters==0.3.8
|
|
65 |
langsmith==0.3.42
|
66 |
lxml==5.4.0
|
67 |
markdown-it-py==3.0.0
|
|
|
68 |
MarkupSafe==3.0.2
|
69 |
marshmallow==3.26.1
|
70 |
matplotlib==3.10.3
|
|
|
65 |
langsmith==0.3.42
|
66 |
lxml==5.4.0
|
67 |
markdown-it-py==3.0.0
|
68 |
+
markdownify==1.1.0
|
69 |
MarkupSafe==3.0.2
|
70 |
marshmallow==3.26.1
|
71 |
matplotlib==3.10.3
|
utils.py
ADDED
@@ -0,0 +1,71 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import json
|
2 |
+
import os
|
3 |
+
from typing import Optional
|
4 |
+
from smolagents.tools import tool
|
5 |
+
|
6 |
+
def find_file_by_task_id(task_id: str, metadata_path: str = "Final_Assignment_Template/validation/metadata.jsonl") -> Optional[str]:
|
7 |
+
"""
|
8 |
+
Search for a filename that matches a given task_id in the metadata.jsonl file.
|
9 |
+
|
10 |
+
Args:
|
11 |
+
task_id (str): The task_id to search for
|
12 |
+
metadata_path (str): Path to the metadata.jsonl file. Defaults to the validation directory path.
|
13 |
+
|
14 |
+
Returns:
|
15 |
+
Optional[str]: The filename if found, None if not found or if task_id has no associated file
|
16 |
+
|
17 |
+
Example:
|
18 |
+
>>> find_file_by_task_id("32102e3e-d12a-4209-9163-7b3a104efe5d")
|
19 |
+
"32102e3e-d12a-4209-9163-7b3a104efe5d.xlsx"
|
20 |
+
"""
|
21 |
+
if not os.path.exists(metadata_path):
|
22 |
+
raise FileNotFoundError(f"Metadata file not found at {metadata_path}")
|
23 |
+
|
24 |
+
with open(metadata_path, 'r', encoding='utf-8') as f:
|
25 |
+
for line in f:
|
26 |
+
try:
|
27 |
+
data = json.loads(line.strip())
|
28 |
+
if data.get('task_id') == task_id:
|
29 |
+
filename = data.get('file_name', '')
|
30 |
+
return filename if filename else None
|
31 |
+
except json.JSONDecodeError:
|
32 |
+
continue
|
33 |
+
|
34 |
+
return None
|
35 |
+
|
36 |
+
def get_full_file_path(task_id: str, base_dir: str = "Final_Assignment_Template/validation") -> Optional[str]:
|
37 |
+
"""
|
38 |
+
Get the full file path for a given task_id if it exists.
|
39 |
+
|
40 |
+
Args:
|
41 |
+
task_id (str): The task_id to search for
|
42 |
+
base_dir (str): Base directory where files are stored. Defaults to validation directory.
|
43 |
+
|
44 |
+
Returns:
|
45 |
+
Optional[str]: Full path to the file if found, None if not found
|
46 |
+
|
47 |
+
Example:
|
48 |
+
>>> get_full_file_path("32102e3e-d12a-4209-9163-7b3a104efe5d")
|
49 |
+
"Final_Assignment_Template/validation/32102e3e-d12a-4209-9163-7b3a104efe5d.xlsx"
|
50 |
+
"""
|
51 |
+
filename = find_file_by_task_id(task_id)
|
52 |
+
if not filename:
|
53 |
+
return None
|
54 |
+
|
55 |
+
full_path = os.path.join(base_dir, filename)
|
56 |
+
return full_path if os.path.exists(full_path) else None
|
57 |
+
|
58 |
+
@tool
|
59 |
+
def load_file_from_task_id(task_id: str) -> str:
|
60 |
+
"""
|
61 |
+
Load a file related to a given task_id if it exists.
|
62 |
+
Args:
|
63 |
+
task_id: The task_id to load the file for
|
64 |
+
Returns:
|
65 |
+
The file content if found, None if not found
|
66 |
+
"""
|
67 |
+
file_path = get_full_file_path(task_id)
|
68 |
+
if not file_path:
|
69 |
+
return "File not found"
|
70 |
+
with open(file_path, 'r') as file:
|
71 |
+
return file.read()
|
vlm_tools.py
CHANGED
@@ -4,6 +4,7 @@ import pytesseract
|
|
4 |
import requests
|
5 |
import base64
|
6 |
import onnxruntime
|
|
|
7 |
from io import BytesIO
|
8 |
from PIL import Image
|
9 |
from langchain_core.tools import tool as langchain_tool
|
@@ -103,6 +104,26 @@ def download_image(image_url: str)->str:
|
|
103 |
image = base64.b64encode(response.content).decode('utf-8')
|
104 |
return image
|
105 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
106 |
@tool
|
107 |
def image_processing(image: str, brightness: float = 1.0, contrast: float = 1.0)->str:
|
108 |
"""
|
|
|
4 |
import requests
|
5 |
import base64
|
6 |
import onnxruntime
|
7 |
+
import os
|
8 |
from io import BytesIO
|
9 |
from PIL import Image
|
10 |
from langchain_core.tools import tool as langchain_tool
|
|
|
104 |
image = base64.b64encode(response.content).decode('utf-8')
|
105 |
return image
|
106 |
|
107 |
+
@tool
|
108 |
+
def get_image_from_file(file_path: str)->str:
|
109 |
+
"""
|
110 |
+
Get an image from a file
|
111 |
+
Args:
|
112 |
+
file_path: The path to the file
|
113 |
+
Returns:
|
114 |
+
The image as a base64 string
|
115 |
+
"""
|
116 |
+
try:
|
117 |
+
with open(file_path, 'rb') as image_file:
|
118 |
+
image = base64.b64encode(image_file.read()).decode('utf-8')
|
119 |
+
except Exception as e:
|
120 |
+
current_file_path = os.path.abspath(__file__)
|
121 |
+
current_file_dir = os.path.dirname(current_file_path)
|
122 |
+
file_path = os.path.join(current_file_dir, file_path.replace("Final_Assignment_Template", ""))
|
123 |
+
with open(file_path, 'rb') as image_file:
|
124 |
+
image = base64.b64encode(image_file.read()).decode('utf-8')
|
125 |
+
return image
|
126 |
+
|
127 |
@tool
|
128 |
def image_processing(image: str, brightness: float = 1.0, contrast: float = 1.0)->str:
|
129 |
"""
|