Spaces:
Sleeping
Sleeping
First version
Browse files- mini_agents.py +45 -0
- tools.py +63 -0
- vlm_tools.py +54 -0
mini_agents.py
ADDED
@@ -0,0 +1,45 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from smolagents import CodeAgent, InferenceClientModel
|
2 |
+
from tools import sort_list
|
3 |
+
import os
|
4 |
+
|
5 |
+
MODEL_CHOICES = {
|
6 |
+
"audio": ["whisper-large-v3"],
|
7 |
+
"vlm": ["Salesforce/blip-image-captioning-base", "smolvlm/vlm-base-patch14-224"],
|
8 |
+
"code": ["gpt-4o-mini"]}
|
9 |
+
|
10 |
+
code_agent = CodeAgent(
|
11 |
+
model=MODEL_CHOICES["code"][0],
|
12 |
+
tools=[sort_list],
|
13 |
+
verbose=True
|
14 |
+
)
|
15 |
+
|
16 |
+
audio_model = InferenceClientModel(
|
17 |
+
model=MODEL_CHOICES["audio"][0],
|
18 |
+
api_key=os.getenv("HUGGINGFACE_API_KEY"),
|
19 |
+
api_url="https://api.openai.com/v1/audio/transcriptions"
|
20 |
+
)
|
21 |
+
|
22 |
+
audio_agent = CodeAgent(
|
23 |
+
model=audio_model,
|
24 |
+
tools=[],
|
25 |
+
verbose=True
|
26 |
+
)
|
27 |
+
|
28 |
+
vlm_model = InferenceClientModel(
|
29 |
+
model=MODEL_CHOICES["vlm"][0],
|
30 |
+
api_key=os.getenv("HUGGINGFACE_API_KEY"),
|
31 |
+
api_url="https://api.openai.com/v1/images/generations"
|
32 |
+
)
|
33 |
+
|
34 |
+
vlm_agent = CodeAgent(
|
35 |
+
model=vlm_model,
|
36 |
+
tools=[],
|
37 |
+
verbose=True
|
38 |
+
)
|
39 |
+
|
40 |
+
|
41 |
+
|
42 |
+
|
43 |
+
|
44 |
+
|
45 |
+
|
tools.py
CHANGED
@@ -0,0 +1,63 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from langchain_core.tools import tool
|
2 |
+
from datetime import datetime
|
3 |
+
from typing import Literal, List
|
4 |
+
from smolagents import WebSearchTool, DuckDuckGoSearchTool, VisitWebpageTool, WikipediaSearchTool
|
5 |
+
|
6 |
+
@tool
|
7 |
+
def get_current_time(timezone: str = "America/New_York", format: str = "%Y-%m-%d %H:%M:%S"):
|
8 |
+
"""
|
9 |
+
Get the current time
|
10 |
+
Args:
|
11 |
+
timezone: The timezone to get the current time in. Example: "America/New_York"
|
12 |
+
format: The format to return the current time in. Example: "%Y-%m-%d %H:%M:%S"
|
13 |
+
Returns:
|
14 |
+
The current time
|
15 |
+
"""
|
16 |
+
return datetime.now(timezone).strftime(format)
|
17 |
+
|
18 |
+
@tool
|
19 |
+
def sort_list(my_list: List[int], order: Literal["asc", "desc", "alphabetize", "alphabetize_reverse"]):
|
20 |
+
"""
|
21 |
+
Sort a list in ascending or descending order if the list contains numbers.
|
22 |
+
Sort it in alphabetically or alphabetically in reverse order if the list contains strings or mixed types.
|
23 |
+
|
24 |
+
Args:
|
25 |
+
my_list: The list to sort
|
26 |
+
order: The order to sort the list in. Must be one of the following:
|
27 |
+
- "asc": Sort the list in ascending order. Only for lists containing numbers.
|
28 |
+
- "desc": Sort the list in descending order. Only for lists containing numbers.
|
29 |
+
- "alphabetize": Sort the list alphabetically. Only for lists containing strings or mixed types.
|
30 |
+
- "alphabetize_reverse": Sort the list alphabetically in reverse order. Only for lists containing strings or mixed types.
|
31 |
+
|
32 |
+
Returns:
|
33 |
+
The sorted list
|
34 |
+
"""
|
35 |
+
if not isinstance(my_list, List):
|
36 |
+
raise ValueError("my_list must be a list")
|
37 |
+
else:
|
38 |
+
if all(isinstance(item, (int, float)) for item in my_list):
|
39 |
+
if order in ["asc", "desc"]:
|
40 |
+
return sorted(my_list, reverse=order == "desc")
|
41 |
+
elif order in ["alphabetize", "alphabetize_reverse"]:
|
42 |
+
how = {
|
43 |
+
"alphabetize": "asc",
|
44 |
+
"alphabetize_reverse": "desc"
|
45 |
+
}
|
46 |
+
return sorted(my_list, key=lambda x: str(x), reverse=how[order] == "desc")
|
47 |
+
else:
|
48 |
+
raise ValueError("order must be one of the following: asc, desc, alphabetize, alphabetize_reverse")
|
49 |
+
else:
|
50 |
+
print("This is a mixed list. Converting and sorting alphabetically.")
|
51 |
+
my_list = [str(item) for item in my_list]
|
52 |
+
how = {
|
53 |
+
"alphabetize": "asc",
|
54 |
+
"alphabetize_reverse": "desc"
|
55 |
+
}
|
56 |
+
return sorted(my_list, reverse=how[order] == "desc")
|
57 |
+
|
58 |
+
#smolagents tools
|
59 |
+
web_search_tool = WebSearchTool()
|
60 |
+
duckduckgo_search_tool = DuckDuckGoSearchTool()
|
61 |
+
visit_webpage_tool = VisitWebpageTool()
|
62 |
+
wikipedia_search_tool = WikipediaSearchTool()
|
63 |
+
|
vlm_tools.py
ADDED
@@ -0,0 +1,54 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from langchain_core.tools import tool
|
2 |
+
|
3 |
+
@tool
|
4 |
+
def download_image(image_url: str):
|
5 |
+
"""
|
6 |
+
Download an image from a url
|
7 |
+
Args:
|
8 |
+
image_url: The url of the image to download
|
9 |
+
Returns:
|
10 |
+
The image as a base64 string
|
11 |
+
"""
|
12 |
+
#download the image into a base64 string
|
13 |
+
image = None
|
14 |
+
return image
|
15 |
+
|
16 |
+
@tool
|
17 |
+
def image_processing(image: str):
|
18 |
+
"""
|
19 |
+
Process an image
|
20 |
+
Args:
|
21 |
+
image: The image in base64 format to process
|
22 |
+
Returns:
|
23 |
+
The processed image
|
24 |
+
"""
|
25 |
+
processed_image = None
|
26 |
+
return processed_image
|
27 |
+
|
28 |
+
@tool
|
29 |
+
def object_detection(image: str):
|
30 |
+
"""
|
31 |
+
Detect objects in an image
|
32 |
+
Args:
|
33 |
+
image: The image in base64 format to detect objects in
|
34 |
+
Returns:
|
35 |
+
The detected objects
|
36 |
+
"""
|
37 |
+
detected_objects = None
|
38 |
+
return detected_objects
|
39 |
+
|
40 |
+
@tool
|
41 |
+
def ocr_scan(image: str):
|
42 |
+
"""
|
43 |
+
Scan an image for text
|
44 |
+
Args:
|
45 |
+
image: The image in base64 format to scan for text
|
46 |
+
Returns:
|
47 |
+
The text in the image
|
48 |
+
"""
|
49 |
+
scanned_text = None
|
50 |
+
return scanned_text
|
51 |
+
|
52 |
+
|
53 |
+
|
54 |
+
|