File size: 3,902 Bytes
9b5b26a
 
 
 
c19d193
6aae614
0759335
 
8fe992b
9b5b26a
 
8a10c10
 
 
d2943b1
089099f
8a10c10
 
 
 
dbfe74a
8a10c10
 
 
dbfe74a
 
 
8a10c10
 
 
6da24fc
dbfe74a
 
 
8a10c10
6da24fc
 
 
 
 
 
 
 
 
7036d0d
 
6da24fc
 
 
e4f3217
dbfe74a
 
 
13a73d4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8c01ffb
6aae614
0759335
 
 
 
ae7a494
 
 
 
e121372
bf6d34c
 
29ec968
fe328e0
13d500a
8c01ffb
 
9b5b26a
 
8c01ffb
861422e
 
9b5b26a
8c01ffb
8fe992b
0759335
8c01ffb
 
 
 
 
 
861422e
8fe992b
 
9b5b26a
8c01ffb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
from smolagents import CodeAgent,DuckDuckGoSearchTool, HfApiModel,load_tool,tool
import datetime
import requests
import pytz
import yaml
from tools.final_answer import FinalAnswerTool
from tools.web_search import DuckDuckGoSearchTool
from tools.visit_webpage import VisitWebpageTool

from Gradio_UI import GradioUI

from kokoro import KPipeline
import soundfile as sf
import os
import numpy as np


# Initialize the Kokoro pipeline
pipeline = KPipeline(lang_code='a')  # 'a' stands for American English

@tool
def text_to_speech_kokoro(text: str, voice: str = 'af_heart', speed: float = 1.0) -> str:
    """Convert text to speech using the Kokoro-82M model.
    
    Args:
        text: The text to be converted to speech.
        voice: The voice to use for speech synthesis (default is 'af_heart').
        speed: The speed of the speech (default is 1.0).
    
    Returns:
        An AgentAudio object with the relative URL to the generated audio file.
    """
    try:
        # Generate speech audio
        generator = pipeline(text, voice=voice, speed=speed, split_pattern=r'\n+')
        audio_segments = []
        for _, _, audio in generator:
            audio_segments.append(audio)
        if not audio_segments:
            raise ValueError("No audio generated.")
        # Concatenate segments into one audio array
        full_audio = np.concatenate(audio_segments)
        sample_rate = 24000  # Kokoro outputs at 24 kHz
        # Ensure the static folder exists and save the file there
        os.makedirs("tools", exist_ok=True)
        filename = os.path.join("tools", "output.wav")
        sf.write(filename, full_audio, sample_rate)
        # Return an AgentAudio object pointing to the relative URL of the audio file
        from smolagents.agent_types import AgentAudio
        return AgentAudio(f"tools/output.wav")
    except Exception as e:
        return f"Error generating speech: {str(e)}"

@tool
def search_dad_jokes(term: str) -> str:
    """A tool that searches for dad jokes containing a specific term.
    Args:
        term: The keyword to search for in dad jokes.
    """
    try:
        headers = {
            "Accept": "application/json",
            "User-Agent": "YourAppName (https://yourappurl.com)"
        }
        response = requests.get(f"https://icanhazdadjoke.com/search?term={term}", headers=headers)
        data = response.json()
        if data['results']:
            jokes = [joke['joke'] for joke in data['results']]
            return f"Found {len(jokes)} jokes:\n" + "\n\n".join(jokes)
        else:
            return f"No jokes found for the term '{term}'."
    except Exception as e:
        return f"Error searching for jokes: {str(e)}"




final_answer = FinalAnswerTool()
web_search_tool = DuckDuckGoSearchTool()
visit_webpage_tool = VisitWebpageTool()



# If the agent does not answer, the model is overloaded, please use another model or the following Hugging Face Endpoint that also contains qwen2.5 coder:
# model_id='https://pflgm2locj2t89co.us-east-1.aws.endpoints.huggingface.cloud' 

model = HfApiModel(
max_tokens=2096,
temperature=0.5,
model_id='Qwen/Qwen2.5-Coder-32B-Instruct',# it is possible that this model may be overloaded
custom_role_conversions=None,
)


# Import tool from Hub
image_generation_tool = load_tool("agents-course/text-to-image", trust_remote_code=True)

with open("prompts.yaml", 'r') as stream:
    prompt_templates = yaml.safe_load(stream)
    
agent = CodeAgent(
    model=model,
    tools=[visit_webpage_tool, web_search_tool, final_answer, image_generation_tool, get_current_time_in_timezone, get_random_cocktail, search_dad_jokes, text_to_speech_kokoro], ## add your tools here (don't remove final answer)
    max_steps=6,
    verbosity_level=1,
    grammar=None,
    planning_interval=None,
    name=None,
    description=None,
    prompt_templates=prompt_templates
)


GradioUI(agent).launch()