Spaces:
Sleeping
Sleeping
Agents' guides and prompts
Browse files- mini_agents.py +6 -6
- prompts.yaml +3 -1
mini_agents.py
CHANGED
@@ -2,7 +2,7 @@ from smolagents import CodeAgent, InferenceClientModel
|
|
2 |
from tools import sort_list, operate_two_numbers, convert_number, load_dataframe_from_csv
|
3 |
from tools import tavily_search_tool, visit_webpage_tool
|
4 |
from tools import to_dataframe, to_json, get_dataframe_data, get_dataframe_column, get_dataframe_row, get_dataframe_groupby
|
5 |
-
from vlm_tools import
|
6 |
from audio_tools import transcribe_audio_tool, audio_to_base64, noise_reduction, audio_segmentation, speaker_diarization
|
7 |
from community_tools import community_tools, get_youtube_transcript_from_url
|
8 |
import os
|
@@ -41,9 +41,9 @@ audio_agent = CodeAgent(
|
|
41 |
tools=[transcribe_audio_tool, audio_to_base64, noise_reduction, audio_segmentation, speaker_diarization],
|
42 |
max_steps=6,
|
43 |
# prompt_templates=PROMPT_TEMPLATE["audio_agent"],
|
44 |
-
additional_authorized_imports=["pytube", "pydub", "pyAudioAnalysis", "base64", "io", "sklearn", "scipy", "numpy", "pandas", "json", "os", "logging", "yaml", "pyplot", "matplotlib", 'hmmlearn', 'pickle'],
|
45 |
name="audio_agent",
|
46 |
-
description="This agent is responsible for rocessing audio, transcribing audio and extracting text from it."
|
47 |
)
|
48 |
|
49 |
vlm_model = InferenceClientModel(
|
@@ -53,12 +53,12 @@ vlm_model = InferenceClientModel(
|
|
53 |
|
54 |
vlm_agent = CodeAgent(
|
55 |
model=vlm_model,
|
56 |
-
tools=[
|
57 |
max_steps=6,
|
58 |
# prompt_templates=PROMPT_TEMPLATE["vlm_agent"],
|
59 |
-
additional_authorized_imports=["cv2", "numpy", "pytesseract", "requests", "base64", "onnxruntime", "PIL", "io", "os", "logging", "yaml", "pyplot", "matplotlib", 'hmmlearn', 'pickle', 'youtube_dl', 'bs4'],
|
60 |
name="vlm_agent",
|
61 |
-
description="This agent is responsible for downloading images, processing images, detecting objects in them and extracting text from them."
|
62 |
)
|
63 |
|
64 |
arithmetic_model = InferenceClientModel(
|
|
|
2 |
from tools import sort_list, operate_two_numbers, convert_number, load_dataframe_from_csv
|
3 |
from tools import tavily_search_tool, visit_webpage_tool
|
4 |
from tools import to_dataframe, to_json, get_dataframe_data, get_dataframe_column, get_dataframe_row, get_dataframe_groupby
|
5 |
+
from vlm_tools import image_processing, object_detection_tool, ocr_scan_tool, extract_images_from_video, get_image_from_file_path, get_video_from_file_path
|
6 |
from audio_tools import transcribe_audio_tool, audio_to_base64, noise_reduction, audio_segmentation, speaker_diarization
|
7 |
from community_tools import community_tools, get_youtube_transcript_from_url
|
8 |
import os
|
|
|
41 |
tools=[transcribe_audio_tool, audio_to_base64, noise_reduction, audio_segmentation, speaker_diarization],
|
42 |
max_steps=6,
|
43 |
# prompt_templates=PROMPT_TEMPLATE["audio_agent"],
|
44 |
+
additional_authorized_imports=["pytube", "pytube3", "youtube_dl", "pydub", "pyAudioAnalysis", "base64", "io", "sklearn", "scipy", "numpy", "pandas", "json", "os", "logging", "yaml", "pyplot", "matplotlib", 'hmmlearn', 'pickle'],
|
45 |
name="audio_agent",
|
46 |
+
description="This agent is responsible for rocessing audio, transcribing audio and extracting text from it. It cannot process videos."
|
47 |
)
|
48 |
|
49 |
vlm_model = InferenceClientModel(
|
|
|
53 |
|
54 |
vlm_agent = CodeAgent(
|
55 |
model=vlm_model,
|
56 |
+
tools=[image_processing, object_detection_tool, ocr_scan_tool, extract_images_from_video, get_image_from_file_path, get_video_from_file_path],
|
57 |
max_steps=6,
|
58 |
# prompt_templates=PROMPT_TEMPLATE["vlm_agent"],
|
59 |
+
additional_authorized_imports=["pytube", "pytube3", "youtube_dl", "cv2", "numpy", "pytesseract", "requests", "base64", "onnxruntime", "PIL", "io", "os", "logging", "yaml", "pyplot", "matplotlib", 'hmmlearn', 'pickle', 'youtube_dl', 'bs4'],
|
60 |
name="vlm_agent",
|
61 |
+
description="This agent is responsible for downloading images or videos, processing images or videos, detecting objects in them and extracting text from them. It cannot process audios."
|
62 |
)
|
63 |
|
64 |
arithmetic_model = InferenceClientModel(
|
prompts.yaml
CHANGED
@@ -192,7 +192,8 @@ system_prompt: |-
|
|
192 |
7. Never create any notional variables in our code, as having these in your logs will derail you from the true variables.
|
193 |
8. You can use imports in your code, but only from the following list of modules: {{authorized_imports}}
|
194 |
9. The state persists between code executions: so if in one step you've created variables or imported modules, these will all persist.
|
195 |
-
10.
|
|
|
196 |
|
197 |
Now Begin!
|
198 |
planning:
|
@@ -320,6 +321,7 @@ managed_agent:
|
|
320 |
{{task}}
|
321 |
---
|
322 |
You're helping your manager solve a wider task: so make sure to not provide a one-line answer, but give as much information as possible to give them a clear understanding of the answer.
|
|
|
323 |
|
324 |
Your final_answer WILL HAVE to contain these parts:
|
325 |
### 1. Task outcome (short version):
|
|
|
192 |
7. Never create any notional variables in our code, as having these in your logs will derail you from the true variables.
|
193 |
8. You can use imports in your code, but only from the following list of modules: {{authorized_imports}}
|
194 |
9. The state persists between code executions: so if in one step you've created variables or imported modules, these will all persist.
|
195 |
+
10. Some questions might require you to use audio or video files. The file path is often mentioned at the end of the task description (example: abcxyz.mp3). You must pass this file path to your managed agents for them to use as arguments to their tools.
|
196 |
+
11. Don't give up! You're in charge of solving the task, not providing directions to solve it.
|
197 |
|
198 |
Now Begin!
|
199 |
planning:
|
|
|
321 |
{{task}}
|
322 |
---
|
323 |
You're helping your manager solve a wider task: so make sure to not provide a one-line answer, but give as much information as possible to give them a clear understanding of the answer.
|
324 |
+
Your manager may pass you a file path to an audio or video file. You must use this file path as an argument to your tools.
|
325 |
|
326 |
Your final_answer WILL HAVE to contain these parts:
|
327 |
### 1. Task outcome (short version):
|