Spaces:
Sleeping
Sleeping
second version
Browse files- .gitignore +7 -0
- app.py +50 -45
- audio_tools.py +95 -0
- mini_agents.py +75 -13
- tools.py +173 -3
- vlm_tools.py +79 -7
.gitignore
ADDED
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
.venv/
|
2 |
+
__pycache__/
|
3 |
+
*.pyc
|
4 |
+
*.pyo
|
5 |
+
*.pyd
|
6 |
+
*.pyw
|
7 |
+
*.pyz
|
app.py
CHANGED
@@ -19,7 +19,7 @@ class BasicAgent:
|
|
19 |
print(f"Agent returning fixed answer: {fixed_answer}")
|
20 |
return fixed_answer
|
21 |
|
22 |
-
def run_and_submit_all( profile: gr.OAuthProfile | None):
|
23 |
"""
|
24 |
Fetches all questions, runs the BasicAgent on them, submits all answers,
|
25 |
and displays the results.
|
@@ -62,9 +62,9 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
|
|
62 |
print(f"Error fetching questions: {e}")
|
63 |
return f"Error fetching questions: {e}", None
|
64 |
except requests.exceptions.JSONDecodeError as e:
|
65 |
-
|
66 |
-
|
67 |
-
|
68 |
except Exception as e:
|
69 |
print(f"An unexpected error occurred fetching questions: {e}")
|
70 |
return f"An unexpected error occurred fetching questions: {e}", None
|
@@ -84,8 +84,8 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
|
|
84 |
answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
|
85 |
results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
|
86 |
except Exception as e:
|
87 |
-
|
88 |
-
|
89 |
|
90 |
if not answers_payload:
|
91 |
print("Agent did not produce any answers to submit.")
|
@@ -98,46 +98,51 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
|
|
98 |
|
99 |
# 5. Submit
|
100 |
print(f"Submitting {len(answers_payload)} answers to: {submit_url}")
|
101 |
-
|
102 |
-
|
103 |
-
|
104 |
-
|
105 |
-
|
106 |
-
f"Submission Successful!\n"
|
107 |
-
f"User: {result_data.get('username')}\n"
|
108 |
-
f"Overall Score: {result_data.get('score', 'N/A')}% "
|
109 |
-
f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)\n"
|
110 |
-
f"Message: {result_data.get('message', 'No message received.')}"
|
111 |
-
)
|
112 |
-
print("Submission successful.")
|
113 |
-
results_df = pd.DataFrame(results_log)
|
114 |
-
return final_status, results_df
|
115 |
-
except requests.exceptions.HTTPError as e:
|
116 |
-
error_detail = f"Server responded with status {e.response.status_code}."
|
117 |
try:
|
118 |
-
|
119 |
-
|
120 |
-
|
121 |
-
|
122 |
-
|
123 |
-
|
124 |
-
|
125 |
-
|
126 |
-
|
127 |
-
|
128 |
-
|
129 |
-
|
130 |
-
|
131 |
-
|
132 |
-
|
133 |
-
|
134 |
-
|
135 |
-
|
136 |
-
|
137 |
-
|
138 |
-
|
139 |
-
|
140 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
141 |
|
142 |
|
143 |
# --- Build Gradio Interface using Blocks ---
|
|
|
19 |
print(f"Agent returning fixed answer: {fixed_answer}")
|
20 |
return fixed_answer
|
21 |
|
22 |
+
def run_and_submit_all( profile: gr.OAuthProfile | None, mock_submission: bool = True):
|
23 |
"""
|
24 |
Fetches all questions, runs the BasicAgent on them, submits all answers,
|
25 |
and displays the results.
|
|
|
62 |
print(f"Error fetching questions: {e}")
|
63 |
return f"Error fetching questions: {e}", None
|
64 |
except requests.exceptions.JSONDecodeError as e:
|
65 |
+
print(f"Error decoding JSON response from questions endpoint: {e}")
|
66 |
+
print(f"Response text: {response.text[:500]}")
|
67 |
+
return f"Error decoding server response for questions: {e}", None
|
68 |
except Exception as e:
|
69 |
print(f"An unexpected error occurred fetching questions: {e}")
|
70 |
return f"An unexpected error occurred fetching questions: {e}", None
|
|
|
84 |
answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
|
85 |
results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
|
86 |
except Exception as e:
|
87 |
+
print(f"Error running agent on task {task_id}: {e}")
|
88 |
+
results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"AGENT ERROR: {e}"})
|
89 |
|
90 |
if not answers_payload:
|
91 |
print("Agent did not produce any answers to submit.")
|
|
|
98 |
|
99 |
# 5. Submit
|
100 |
print(f"Submitting {len(answers_payload)} answers to: {submit_url}")
|
101 |
+
if mock_submission:
|
102 |
+
answer_df = pd.DataFrame(results_log, columns=["Task ID", "Question", "Submitted Answer"])
|
103 |
+
answer_df.to_csv("answers.csv", index=False)
|
104 |
+
return "Answers saved to answers.csv", answer_df
|
105 |
+
else:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
106 |
try:
|
107 |
+
response = requests.post(submit_url, json=submission_data, timeout=60)
|
108 |
+
response.raise_for_status()
|
109 |
+
result_data = response.json()
|
110 |
+
final_status = (
|
111 |
+
f"Submission Successful!\n"
|
112 |
+
f"User: {result_data.get('username')}\n"
|
113 |
+
f"Overall Score: {result_data.get('score', 'N/A')}% "
|
114 |
+
f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)\n"
|
115 |
+
f"Message: {result_data.get('message', 'No message received.')}"
|
116 |
+
)
|
117 |
+
print("Submission successful.")
|
118 |
+
results_df = pd.DataFrame(results_log)
|
119 |
+
return final_status, results_df
|
120 |
+
except requests.exceptions.HTTPError as e:
|
121 |
+
error_detail = f"Server responded with status {e.response.status_code}."
|
122 |
+
try:
|
123 |
+
error_json = e.response.json()
|
124 |
+
error_detail += f" Detail: {error_json.get('detail', e.response.text)}"
|
125 |
+
except requests.exceptions.JSONDecodeError:
|
126 |
+
error_detail += f" Response: {e.response.text[:500]}"
|
127 |
+
status_message = f"Submission Failed: {error_detail}"
|
128 |
+
print(status_message)
|
129 |
+
results_df = pd.DataFrame(results_log)
|
130 |
+
return status_message, results_df
|
131 |
+
except requests.exceptions.Timeout:
|
132 |
+
status_message = "Submission Failed: The request timed out."
|
133 |
+
print(status_message)
|
134 |
+
results_df = pd.DataFrame(results_log)
|
135 |
+
return status_message, results_df
|
136 |
+
except requests.exceptions.RequestException as e:
|
137 |
+
status_message = f"Submission Failed: Network error - {e}"
|
138 |
+
print(status_message)
|
139 |
+
results_df = pd.DataFrame(results_log)
|
140 |
+
return status_message, results_df
|
141 |
+
except Exception as e:
|
142 |
+
status_message = f"An unexpected error occurred during submission: {e}"
|
143 |
+
print(status_message)
|
144 |
+
results_df = pd.DataFrame(results_log)
|
145 |
+
return status_message, results_df
|
146 |
|
147 |
|
148 |
# --- Build Gradio Interface using Blocks ---
|
audio_tools.py
ADDED
@@ -0,0 +1,95 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from langchain_core.tools import tool
|
2 |
+
from pydub import AudioSegment
|
3 |
+
from pyAudioAnalysis import audioSegmentation as aS
|
4 |
+
import base64
|
5 |
+
from io import BytesIO
|
6 |
+
|
7 |
+
@tool
|
8 |
+
def audio_to_base64(file_path: str) -> str:
|
9 |
+
"""
|
10 |
+
Convert an audio file to base64 format
|
11 |
+
Args:
|
12 |
+
file_path: Path to the audio file
|
13 |
+
Returns:
|
14 |
+
The audio file in base64 format
|
15 |
+
"""
|
16 |
+
# Load the audio file
|
17 |
+
audio = AudioSegment.from_file(file_path)
|
18 |
+
|
19 |
+
# Export the audio to a BytesIO object
|
20 |
+
buffer = BytesIO()
|
21 |
+
audio.export(buffer, format="wav") # You can change the format if needed
|
22 |
+
|
23 |
+
# Encode the audio data to base64
|
24 |
+
audio_base64 = base64.b64encode(buffer.getvalue()).decode('utf-8')
|
25 |
+
return audio_base64
|
26 |
+
|
27 |
+
@tool
|
28 |
+
def noise_reduction(audio: str) -> str:
|
29 |
+
"""
|
30 |
+
Reduce noise from an audio file
|
31 |
+
Args:
|
32 |
+
audio: The audio file in base64 format
|
33 |
+
Returns:
|
34 |
+
The denoised audio file in base64 format
|
35 |
+
"""
|
36 |
+
# Decode the base64 audio
|
37 |
+
audio_data = base64.b64decode(audio)
|
38 |
+
audio_segment = AudioSegment.from_file(BytesIO(audio_data))
|
39 |
+
|
40 |
+
# Apply noise reduction (simple example using low-pass filter)
|
41 |
+
denoised_audio = audio_segment.low_pass_filter(3000)
|
42 |
+
|
43 |
+
# Encode back to base64
|
44 |
+
buffer = BytesIO()
|
45 |
+
denoised_audio.export(buffer, format="wav")
|
46 |
+
return base64.b64encode(buffer.getvalue()).decode('utf-8')
|
47 |
+
|
48 |
+
@tool
|
49 |
+
def audio_segmentation(audio: str, segment_length: int = 30) -> list:
|
50 |
+
"""
|
51 |
+
Segment an audio file into smaller chunks
|
52 |
+
Args:
|
53 |
+
audio: The audio file in base64 format
|
54 |
+
segment_length: Length of each segment in seconds
|
55 |
+
Returns:
|
56 |
+
List of audio segments in base64 format
|
57 |
+
"""
|
58 |
+
# Decode the base64 audio
|
59 |
+
audio_data = base64.b64decode(audio)
|
60 |
+
audio_segment = AudioSegment.from_file(BytesIO(audio_data))
|
61 |
+
|
62 |
+
# Segment the audio
|
63 |
+
segments = []
|
64 |
+
for i in range(0, len(audio_segment), segment_length * 1000):
|
65 |
+
segment = audio_segment[i:i + segment_length * 1000]
|
66 |
+
buffer = BytesIO()
|
67 |
+
segment.export(buffer, format="wav")
|
68 |
+
segments.append(base64.b64encode(buffer.getvalue()).decode('utf-8'))
|
69 |
+
|
70 |
+
return segments
|
71 |
+
|
72 |
+
@tool
|
73 |
+
def speaker_diarization(audio: str) -> list:
|
74 |
+
"""
|
75 |
+
Diarize an audio file into speakers
|
76 |
+
Args:
|
77 |
+
audio: The audio file in base64 format
|
78 |
+
Returns:
|
79 |
+
List of speaker segments
|
80 |
+
"""
|
81 |
+
# Decode the base64 audio
|
82 |
+
audio_data = base64.b64decode(audio)
|
83 |
+
audio_path = "temp_audio.wav"
|
84 |
+
with open(audio_path, "wb") as f:
|
85 |
+
f.write(audio_data)
|
86 |
+
|
87 |
+
# Perform speaker diarization
|
88 |
+
[flags, classes, centers] = aS.speakerDiarization(audio_path, 2) # Assuming 2 speakers
|
89 |
+
|
90 |
+
# Process the output
|
91 |
+
speaker_segments = []
|
92 |
+
for i, flag in enumerate(flags):
|
93 |
+
speaker_segments.append((i, flag))
|
94 |
+
|
95 |
+
return speaker_segments
|
mini_agents.py
CHANGED
@@ -1,17 +1,17 @@
|
|
1 |
from smolagents import CodeAgent, InferenceClientModel
|
2 |
-
from tools import sort_list
|
|
|
|
|
|
|
3 |
import os
|
4 |
|
5 |
MODEL_CHOICES = {
|
6 |
"audio": ["whisper-large-v3"],
|
7 |
-
"vlm": ["
|
8 |
-
"code": ["
|
9 |
-
|
10 |
-
|
11 |
-
|
12 |
-
tools=[sort_list],
|
13 |
-
verbose=True
|
14 |
-
)
|
15 |
|
16 |
audio_model = InferenceClientModel(
|
17 |
model=MODEL_CHOICES["audio"][0],
|
@@ -21,8 +21,11 @@ audio_model = InferenceClientModel(
|
|
21 |
|
22 |
audio_agent = CodeAgent(
|
23 |
model=audio_model,
|
24 |
-
tools=[],
|
25 |
-
verbose=True
|
|
|
|
|
|
|
26 |
)
|
27 |
|
28 |
vlm_model = InferenceClientModel(
|
@@ -33,13 +36,72 @@ vlm_model = InferenceClientModel(
|
|
33 |
|
34 |
vlm_agent = CodeAgent(
|
35 |
model=vlm_model,
|
36 |
-
tools=[],
|
37 |
-
verbose=True
|
|
|
|
|
|
|
38 |
)
|
39 |
|
|
|
|
|
|
|
|
|
|
|
40 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
41 |
|
|
|
|
|
|
|
|
|
|
|
42 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
43 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
44 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
45 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
from smolagents import CodeAgent, InferenceClientModel
|
2 |
+
from tools import sort_list, operate_two_numbers, convert_number
|
3 |
+
from tools import to_dataframe, to_json, get_dataframe_data, get_dataframe_column, get_dataframe_row, get_dataframe_groupby
|
4 |
+
from vlm_tools import download_image, image_processing, object_detection, ocr_scan
|
5 |
+
from audio_tools import audio_to_base64, noise_reduction, audio_segmentation, speaker_diarization
|
6 |
import os
|
7 |
|
8 |
MODEL_CHOICES = {
|
9 |
"audio": ["whisper-large-v3"],
|
10 |
+
"vlm": ["Qwen/Qwen2.5-VL-7B-Instruct"],
|
11 |
+
"code": ["Qwen/Qwen2.5-Coder-32B-Instruct"],
|
12 |
+
"arithmetic": ["Qwen/Qwen2.5-Coder-7B-Instruct"],
|
13 |
+
"pandas": ["Qwen/Qwen2.5-Coder-7B-Instruct"]
|
14 |
+
}
|
|
|
|
|
|
|
15 |
|
16 |
audio_model = InferenceClientModel(
|
17 |
model=MODEL_CHOICES["audio"][0],
|
|
|
21 |
|
22 |
audio_agent = CodeAgent(
|
23 |
model=audio_model,
|
24 |
+
tools=[audio_to_base64, noise_reduction, audio_segmentation, speaker_diarization],
|
25 |
+
verbose=True,
|
26 |
+
max_steps=4,
|
27 |
+
name="Audio Agent",
|
28 |
+
description="This agent is responsible for rocessing audio, transcribing audio and extracting text from it."
|
29 |
)
|
30 |
|
31 |
vlm_model = InferenceClientModel(
|
|
|
36 |
|
37 |
vlm_agent = CodeAgent(
|
38 |
model=vlm_model,
|
39 |
+
tools=[download_image, image_processing, object_detection, ocr_scan],
|
40 |
+
verbose=True,
|
41 |
+
max_steps=4,
|
42 |
+
name="VLM Agent",
|
43 |
+
description="This agent is responsible for downloading images, processing images, detecting objects in them and extracting text from them."
|
44 |
)
|
45 |
|
46 |
+
arithmetic_model = InferenceClientModel(
|
47 |
+
model=MODEL_CHOICES["arithmetic"][0],
|
48 |
+
api_key=os.getenv("HUGGINGFACE_API_KEY"),
|
49 |
+
api_url="https://api.openai.com/v1/chat/completions"
|
50 |
+
)
|
51 |
|
52 |
+
arithmetic_agent = CodeAgent(
|
53 |
+
model=arithmetic_model,
|
54 |
+
tools=[operate_two_numbers, convert_number],
|
55 |
+
verbose=True,
|
56 |
+
max_steps=4,
|
57 |
+
name="Arithmetic Agent",
|
58 |
+
description="This agent is responsible for performing arithmetic operations on two numbers."
|
59 |
+
)
|
60 |
|
61 |
+
pandas_model = InferenceClientModel(
|
62 |
+
model=MODEL_CHOICES["pandas"][0],
|
63 |
+
api_key=os.getenv("HUGGINGFACE_API_KEY"),
|
64 |
+
api_url="https://api.openai.com/v1/chat/completions"
|
65 |
+
)
|
66 |
|
67 |
+
pandas_agent = CodeAgent(
|
68 |
+
model=pandas_model,
|
69 |
+
tools=[to_dataframe, to_json, get_dataframe_data, get_dataframe_column, get_dataframe_row, get_dataframe_groupby],
|
70 |
+
verbose=True,
|
71 |
+
max_steps=4,
|
72 |
+
name="Pandas Agent",
|
73 |
+
description="This agent is responsible for converting data to a dataframe, performing pandas operations on such dataframe and converting the dataframe back to a json or a csv file."
|
74 |
+
)
|
75 |
|
76 |
+
multimodal_manager = CodeAgent(
|
77 |
+
model=MODEL_CHOICES["code"][0],
|
78 |
+
managed_agents=[audio_agent, vlm_agent],
|
79 |
+
tools=[sort_list],
|
80 |
+
verbose=True,
|
81 |
+
max_steps=8,
|
82 |
+
planning_steps=4,
|
83 |
+
name="Multimodal Manager",
|
84 |
+
description="This agent is responsible for managing the audio and vlm agents."
|
85 |
+
)
|
86 |
|
87 |
+
operation_manager = CodeAgent(
|
88 |
+
model=MODEL_CHOICES["code"][0],
|
89 |
+
managed_agents=[arithmetic_agent, pandas_agent],
|
90 |
+
tools=[sort_list],
|
91 |
+
verbose=True,
|
92 |
+
max_steps=8,
|
93 |
+
planning_steps=4,
|
94 |
+
name="Operation Manager",
|
95 |
+
description="This agent is responsible for managing the arithmetic and pandas agents."
|
96 |
+
)
|
97 |
|
98 |
+
master_agent = CodeAgent(
|
99 |
+
model=MODEL_CHOICES["code"][0],
|
100 |
+
managed_agents=[multimodal_manager, operation_manager],
|
101 |
+
tools=[sort_list],
|
102 |
+
verbose=True,
|
103 |
+
max_steps=16,
|
104 |
+
planning_steps=4,
|
105 |
+
name="Master Agent",
|
106 |
+
description="This agent is responsible for managing the multimodal and operation managers."
|
107 |
+
)
|
tools.py
CHANGED
@@ -1,10 +1,11 @@
|
|
1 |
from langchain_core.tools import tool
|
2 |
from datetime import datetime
|
3 |
-
from typing import Literal, List
|
4 |
from smolagents import WebSearchTool, DuckDuckGoSearchTool, VisitWebpageTool, WikipediaSearchTool
|
|
|
5 |
|
6 |
@tool
|
7 |
-
def get_current_time(timezone: str = "America/New_York", format: str = "%Y-%m-%d %H:%M:%S"):
|
8 |
"""
|
9 |
Get the current time
|
10 |
Args:
|
@@ -16,7 +17,7 @@ def get_current_time(timezone: str = "America/New_York", format: str = "%Y-%m-%d
|
|
16 |
return datetime.now(timezone).strftime(format)
|
17 |
|
18 |
@tool
|
19 |
-
def sort_list(my_list: List[int], order: Literal["asc", "desc", "alphabetize", "alphabetize_reverse"]):
|
20 |
"""
|
21 |
Sort a list in ascending or descending order if the list contains numbers.
|
22 |
Sort it in alphabetically or alphabetically in reverse order if the list contains strings or mixed types.
|
@@ -61,3 +62,172 @@ duckduckgo_search_tool = DuckDuckGoSearchTool()
|
|
61 |
visit_webpage_tool = VisitWebpageTool()
|
62 |
wikipedia_search_tool = WikipediaSearchTool()
|
63 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
from langchain_core.tools import tool
|
2 |
from datetime import datetime
|
3 |
+
from typing import Literal, List, Union
|
4 |
from smolagents import WebSearchTool, DuckDuckGoSearchTool, VisitWebpageTool, WikipediaSearchTool
|
5 |
+
import pandas as pd
|
6 |
|
7 |
@tool
|
8 |
+
def get_current_time(timezone: str = "America/New_York", format: str = "%Y-%m-%d %H:%M:%S")->str:
|
9 |
"""
|
10 |
Get the current time
|
11 |
Args:
|
|
|
17 |
return datetime.now(timezone).strftime(format)
|
18 |
|
19 |
@tool
|
20 |
+
def sort_list(my_list: List[int], order: Literal["asc", "desc", "alphabetize", "alphabetize_reverse"])->List[int]:
|
21 |
"""
|
22 |
Sort a list in ascending or descending order if the list contains numbers.
|
23 |
Sort it in alphabetically or alphabetically in reverse order if the list contains strings or mixed types.
|
|
|
62 |
visit_webpage_tool = VisitWebpageTool()
|
63 |
wikipedia_search_tool = WikipediaSearchTool()
|
64 |
|
65 |
+
@tool
|
66 |
+
def operate_two_numbers(num1: float, num2: float, operation: Literal["add", "subtract", "multiply", "divide", "power", "modulo"], decimal_places: int = 2)->float:
|
67 |
+
"""
|
68 |
+
Operate on two numbers
|
69 |
+
Args:
|
70 |
+
num1: The first number to operate on. Must be a float.
|
71 |
+
num2: The second number to operate on. Must be a float.
|
72 |
+
operation: The operation to perform. Must be one of the following:
|
73 |
+
- "add": Add the two numbers
|
74 |
+
- "subtract": Subtract the two numbers
|
75 |
+
- "multiply": Multiply the two numbers
|
76 |
+
- "divide": Divide the two numbers
|
77 |
+
- "power": Raise the first number to the power of the second number
|
78 |
+
- "modulo": Return the remainder of the division of the first number by the second number
|
79 |
+
decimal_places: The number of decimal places to round the result to. Default is 2.
|
80 |
+
Returns:
|
81 |
+
The result of the operation
|
82 |
+
"""
|
83 |
+
if operation == "add":
|
84 |
+
return round(num1 + num2, decimal_places)
|
85 |
+
elif operation == "subtract":
|
86 |
+
return round(num1 - num2, decimal_places)
|
87 |
+
elif operation == "multiply":
|
88 |
+
return round(num1 * num2, decimal_places)
|
89 |
+
elif operation == "divide":
|
90 |
+
return round(num1 / num2, decimal_places)
|
91 |
+
elif operation == "power":
|
92 |
+
return round(num1 ** num2, decimal_places)
|
93 |
+
elif operation == "modulo":
|
94 |
+
return round(num1 % num2, decimal_places)
|
95 |
+
else:
|
96 |
+
raise ValueError("operation must be one of the following: add, subtract, multiply, divide, power, modulo")
|
97 |
+
|
98 |
+
@tool
|
99 |
+
def convert_number(orig_num: Union[float, int], operation: Literal["to_base", "type_cast"], new_base: Literal["binary", "octal", "hexadecimal", "int", "float"], decimal_places: int = 2)->Union[int, float]:
|
100 |
+
"""
|
101 |
+
Convert a number to a new base
|
102 |
+
Args:
|
103 |
+
orig_num: The number to convert. Must be a float or int.
|
104 |
+
operation: The operation to perform. Must be one of the following:
|
105 |
+
- "to_base": Convert the number to a new base.
|
106 |
+
- "type_cast": Convert the number to a new type.
|
107 |
+
new_base: The new base to convert the number to. Must be one of the following:
|
108 |
+
- "binary": Convert the number to binary.
|
109 |
+
- "octal": Convert the number to octal.
|
110 |
+
- "hexadecimal": Convert the number to hexadecimal.
|
111 |
+
- "int": Convert the number to an int.
|
112 |
+
- "float": Convert the number to a float.
|
113 |
+
decimal_places: The number of decimal places to round the result to. Default is 2. Only used if operation is "type_cast" and new_base is "float".
|
114 |
+
Returns:
|
115 |
+
The converted number
|
116 |
+
"""
|
117 |
+
if operation == "to_base":
|
118 |
+
if new_base == "binary":
|
119 |
+
return bin(orig_num)
|
120 |
+
elif new_base == "octal":
|
121 |
+
return oct(orig_num)
|
122 |
+
elif new_base == "hexadecimal":
|
123 |
+
return hex(orig_num)
|
124 |
+
else:
|
125 |
+
raise ValueError("new_base must be one of the following: binary, octal, hexadecimal, int, float")
|
126 |
+
elif operation == "type_cast":
|
127 |
+
if new_base == "int":
|
128 |
+
return int(orig_num)
|
129 |
+
elif new_base == "float":
|
130 |
+
return round(float(orig_num), decimal_places)
|
131 |
+
else:
|
132 |
+
raise ValueError("new_base must be one of the following: int, float")
|
133 |
+
else:
|
134 |
+
raise ValueError("operation must be one of the following: to_base, type_cast")
|
135 |
+
|
136 |
+
@tool
|
137 |
+
def to_dataframe(data: List[dict], columns: List[str])->pd.DataFrame:
|
138 |
+
"""
|
139 |
+
Convert a list of dictionaries to a pandas DataFrame
|
140 |
+
"""
|
141 |
+
return pd.DataFrame(data, columns=columns)
|
142 |
+
|
143 |
+
@tool
|
144 |
+
def to_json(data: pd.DataFrame)->str:
|
145 |
+
"""
|
146 |
+
Convert a pandas DataFrame to a JSON string
|
147 |
+
"""
|
148 |
+
return data.to_json(orient="records")
|
149 |
+
|
150 |
+
@tool
|
151 |
+
def get_dataframe_data(data: pd.DataFrame, column: Union[str, int], row: Union[str, int])->Union[str, int, float]:
|
152 |
+
"""
|
153 |
+
Get a specific cell from a pandas DataFrame
|
154 |
+
Args:
|
155 |
+
data: The pandas DataFrame to get the data from.
|
156 |
+
column: The column to get the data from. Must be a string or int. If int then it is the index of the column.
|
157 |
+
row: The row to get the data from. Must be a string or int. If int then it is the index of the row.
|
158 |
+
Returns:
|
159 |
+
The data from the specified cell
|
160 |
+
"""
|
161 |
+
if isinstance(column, int):
|
162 |
+
column = data.iloc[:, column]
|
163 |
+
if isinstance(row, int):
|
164 |
+
row = data.iloc[row, :]
|
165 |
+
return data.loc[row, column]
|
166 |
+
|
167 |
+
@tool
|
168 |
+
def get_dataframe_column(data: pd.DataFrame, column: Union[str, int])->pd.Series:
|
169 |
+
"""
|
170 |
+
Get a specific column from a pandas DataFrame
|
171 |
+
Args:
|
172 |
+
data: The pandas DataFrame to get the column from.
|
173 |
+
column: The column to get the data from. Must be a string or int. If int then it is the index of the column.
|
174 |
+
Returns:
|
175 |
+
The data from the specified column
|
176 |
+
"""
|
177 |
+
return data.iloc[:, column]
|
178 |
+
|
179 |
+
@tool
|
180 |
+
def get_dataframe_row(data: pd.DataFrame, row: Union[str, int])->pd.Series:
|
181 |
+
"""
|
182 |
+
Get a specific row from a pandas DataFrame
|
183 |
+
Args:
|
184 |
+
data: The pandas DataFrame to get the row from.
|
185 |
+
row: The row to get the data from. Must be a string or int. If int then it is the index of the row.
|
186 |
+
Returns:
|
187 |
+
The data from the specified row
|
188 |
+
"""
|
189 |
+
return data.iloc[row, :]
|
190 |
+
|
191 |
+
@tool
|
192 |
+
def get_dataframe_groupby(data: pd.DataFrame, column: str, operation: Literal["mean", "sum", "count", "min", "max", "median", "std", "var"])->pd.DataFrame:
|
193 |
+
"""
|
194 |
+
Group a pandas DataFrame by a specific column and perform an operation on the grouped data
|
195 |
+
Args:
|
196 |
+
data: The pandas DataFrame to group.
|
197 |
+
column: The column to group the data by.
|
198 |
+
operation: The operation to perform on the grouped data. Must be one of the following:
|
199 |
+
- "mean": Calculate the mean of the grouped data.
|
200 |
+
- "sum": Calculate the sum of the grouped data.
|
201 |
+
- "count": Count the number of rows in the grouped data.
|
202 |
+
- "min": Calculate the minimum of the grouped data.
|
203 |
+
- "max": Calculate the maximum of the grouped data.
|
204 |
+
- "median": Calculate the median of the grouped data.
|
205 |
+
- "std": Calculate the standard deviation of the grouped data.
|
206 |
+
- "var": Calculate the variance of the grouped data.
|
207 |
+
Returns:
|
208 |
+
The grouped data
|
209 |
+
"""
|
210 |
+
if operation == "mean":
|
211 |
+
return data.groupby(column).mean()
|
212 |
+
elif operation == "sum":
|
213 |
+
return data.groupby(column).sum()
|
214 |
+
elif operation == "count":
|
215 |
+
return data.groupby(column).count()
|
216 |
+
elif operation == "min":
|
217 |
+
return data.groupby(column).min()
|
218 |
+
elif operation == "max":
|
219 |
+
return data.groupby(column).max()
|
220 |
+
elif operation == "median":
|
221 |
+
return data.groupby(column).median()
|
222 |
+
elif operation == "std":
|
223 |
+
return data.groupby(column).std()
|
224 |
+
elif operation == "var":
|
225 |
+
return data.groupby(column).var()
|
226 |
+
else:
|
227 |
+
raise ValueError("operation must be one of the following: mean, sum, count, min, max, median, std, var")
|
228 |
+
|
229 |
+
|
230 |
+
|
231 |
+
|
232 |
+
|
233 |
+
|
vlm_tools.py
CHANGED
@@ -1,3 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
from langchain_core.tools import tool
|
2 |
|
3 |
@tool
|
@@ -9,32 +16,95 @@ def download_image(image_url: str):
|
|
9 |
Returns:
|
10 |
The image as a base64 string
|
11 |
"""
|
12 |
-
|
13 |
-
image =
|
14 |
return image
|
15 |
|
16 |
@tool
|
17 |
-
def image_processing(image: str):
|
18 |
"""
|
19 |
Process an image
|
20 |
Args:
|
21 |
image: The image in base64 format to process
|
|
|
|
|
22 |
Returns:
|
23 |
The processed image
|
24 |
"""
|
25 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
26 |
return processed_image
|
27 |
|
|
|
|
|
|
|
|
|
28 |
@tool
|
29 |
-
def object_detection(image: str):
|
30 |
"""
|
31 |
Detect objects in an image
|
32 |
Args:
|
33 |
image: The image in base64 format to detect objects in
|
|
|
|
|
|
|
34 |
Returns:
|
35 |
The detected objects
|
36 |
"""
|
37 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
38 |
return detected_objects
|
39 |
|
40 |
@tool
|
@@ -46,7 +116,9 @@ def ocr_scan(image: str):
|
|
46 |
Returns:
|
47 |
The text in the image
|
48 |
"""
|
49 |
-
|
|
|
|
|
50 |
return scanned_text
|
51 |
|
52 |
|
|
|
1 |
+
import cv2
|
2 |
+
import numpy as np
|
3 |
+
import pytesseract
|
4 |
+
import requests
|
5 |
+
import base64
|
6 |
+
from io import BytesIO
|
7 |
+
from PIL import Image
|
8 |
from langchain_core.tools import tool
|
9 |
|
10 |
@tool
|
|
|
16 |
Returns:
|
17 |
The image as a base64 string
|
18 |
"""
|
19 |
+
response = requests.get(image_url)
|
20 |
+
image = base64.b64encode(response.content).decode('utf-8')
|
21 |
return image
|
22 |
|
23 |
@tool
|
24 |
+
def image_processing(image: str, brightness: float = 1.0, contrast: float = 1.0):
|
25 |
"""
|
26 |
Process an image
|
27 |
Args:
|
28 |
image: The image in base64 format to process
|
29 |
+
brightness: The brightness of the image on scale of 0-10
|
30 |
+
contrast: The contrast of the image on scale of 0-10
|
31 |
Returns:
|
32 |
The processed image
|
33 |
"""
|
34 |
+
image_data = base64.b64decode(image)
|
35 |
+
np_image = np.frombuffer(image_data, np.uint8)
|
36 |
+
img = cv2.imdecode(np_image, cv2.IMREAD_COLOR)
|
37 |
+
|
38 |
+
# Adjust brightness and contrast
|
39 |
+
img = cv2.convertScaleAbs(img, alpha=contrast, beta=brightness)
|
40 |
+
|
41 |
+
_, buffer = cv2.imencode('.jpg', img)
|
42 |
+
processed_image = base64.b64encode(buffer).decode('utf-8')
|
43 |
return processed_image
|
44 |
|
45 |
+
weights_path = "vlm_assets/yolo11n.weights"
|
46 |
+
config_path = "vlm_assets/yolo11n.cfg"
|
47 |
+
names_path = "vlm_assets/obj.names"
|
48 |
+
|
49 |
@tool
|
50 |
+
def object_detection(image: str, weights_path: str = weights_path, config_path: str = config_path, names_path: str = names_path):
|
51 |
"""
|
52 |
Detect objects in an image
|
53 |
Args:
|
54 |
image: The image in base64 format to detect objects in
|
55 |
+
weights_path: The path to the weights file
|
56 |
+
config_path: The path to the config file
|
57 |
+
names_path: The path to the names file
|
58 |
Returns:
|
59 |
The detected objects
|
60 |
"""
|
61 |
+
image_data = base64.b64decode(image)
|
62 |
+
np_image = np.frombuffer(image_data, np.uint8)
|
63 |
+
img = cv2.imdecode(np_image, cv2.IMREAD_COLOR)
|
64 |
+
|
65 |
+
# Load YOLO
|
66 |
+
net = cv2.dnn.readNet(weights_path, config_path)
|
67 |
+
layer_names = net.getLayerNames()
|
68 |
+
output_layers = [layer_names[i[0] - 1] for i in net.getUnconnectedOutLayers()]
|
69 |
+
|
70 |
+
# Load class labels
|
71 |
+
with open(names_path, 'r') as f:
|
72 |
+
classes = [line.strip() for line in f.readlines()]
|
73 |
+
|
74 |
+
# Detect objects
|
75 |
+
blob = cv2.dnn.blobFromImage(img, 0.00392, (416, 416), (0, 0, 0), True, crop=False)
|
76 |
+
net.setInput(blob)
|
77 |
+
outs = net.forward(output_layers)
|
78 |
+
|
79 |
+
# Process detections
|
80 |
+
class_ids = []
|
81 |
+
confidences = []
|
82 |
+
boxes = []
|
83 |
+
for out in outs:
|
84 |
+
for detection in out:
|
85 |
+
scores = detection[5:]
|
86 |
+
class_id = np.argmax(scores)
|
87 |
+
confidence = scores[class_id]
|
88 |
+
if confidence > 0.5:
|
89 |
+
center_x = int(detection[0] * img.shape[1])
|
90 |
+
center_y = int(detection[1] * img.shape[0])
|
91 |
+
w = int(detection[2] * img.shape[1])
|
92 |
+
h = int(detection[3] * img.shape[0])
|
93 |
+
x = int(center_x - w / 2)
|
94 |
+
y = int(center_y - h / 2)
|
95 |
+
boxes.append([x, y, w, h])
|
96 |
+
confidences.append(float(confidence))
|
97 |
+
class_ids.append(class_id)
|
98 |
+
|
99 |
+
# Apply non-max suppression
|
100 |
+
indices = cv2.dnn.NMSBoxes(boxes, confidences, 0.5, 0.4)
|
101 |
+
detected_objects = []
|
102 |
+
for i in indices:
|
103 |
+
i = i[0]
|
104 |
+
box = boxes[i]
|
105 |
+
label = str(classes[class_ids[i]])
|
106 |
+
detected_objects.append((label, confidences[i], box))
|
107 |
+
|
108 |
return detected_objects
|
109 |
|
110 |
@tool
|
|
|
116 |
Returns:
|
117 |
The text in the image
|
118 |
"""
|
119 |
+
image_data = base64.b64decode(image)
|
120 |
+
img = Image.open(BytesIO(image_data))
|
121 |
+
scanned_text = pytesseract.image_to_string(img)
|
122 |
return scanned_text
|
123 |
|
124 |
|