Spaces:
Running
Running
File size: 3,902 Bytes
9b5b26a c19d193 6aae614 0759335 8fe992b 9b5b26a 8a10c10 d2943b1 089099f 8a10c10 dbfe74a 8a10c10 dbfe74a 8a10c10 6da24fc dbfe74a 8a10c10 6da24fc 7036d0d 6da24fc e4f3217 dbfe74a 13a73d4 8c01ffb 6aae614 0759335 ae7a494 e121372 bf6d34c 29ec968 fe328e0 13d500a 8c01ffb 9b5b26a 8c01ffb 861422e 9b5b26a 8c01ffb 8fe992b 0759335 8c01ffb 861422e 8fe992b 9b5b26a 8c01ffb |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 |
from smolagents import CodeAgent,DuckDuckGoSearchTool, HfApiModel,load_tool,tool
import datetime
import requests
import pytz
import yaml
from tools.final_answer import FinalAnswerTool
from tools.web_search import DuckDuckGoSearchTool
from tools.visit_webpage import VisitWebpageTool
from Gradio_UI import GradioUI
from kokoro import KPipeline
import soundfile as sf
import os
import numpy as np
# Initialize the Kokoro pipeline
pipeline = KPipeline(lang_code='a') # 'a' stands for American English
@tool
def text_to_speech_kokoro(text: str, voice: str = 'af_heart', speed: float = 1.0) -> str:
"""Convert text to speech using the Kokoro-82M model.
Args:
text: The text to be converted to speech.
voice: The voice to use for speech synthesis (default is 'af_heart').
speed: The speed of the speech (default is 1.0).
Returns:
An AgentAudio object with the relative URL to the generated audio file.
"""
try:
# Generate speech audio
generator = pipeline(text, voice=voice, speed=speed, split_pattern=r'\n+')
audio_segments = []
for _, _, audio in generator:
audio_segments.append(audio)
if not audio_segments:
raise ValueError("No audio generated.")
# Concatenate segments into one audio array
full_audio = np.concatenate(audio_segments)
sample_rate = 24000 # Kokoro outputs at 24 kHz
# Ensure the static folder exists and save the file there
os.makedirs("tools", exist_ok=True)
filename = os.path.join("tools", "output.wav")
sf.write(filename, full_audio, sample_rate)
# Return an AgentAudio object pointing to the relative URL of the audio file
from smolagents.agent_types import AgentAudio
return AgentAudio(f"tools/output.wav")
except Exception as e:
return f"Error generating speech: {str(e)}"
@tool
def search_dad_jokes(term: str) -> str:
"""A tool that searches for dad jokes containing a specific term.
Args:
term: The keyword to search for in dad jokes.
"""
try:
headers = {
"Accept": "application/json",
"User-Agent": "YourAppName (https://yourappurl.com)"
}
response = requests.get(f"https://icanhazdadjoke.com/search?term={term}", headers=headers)
data = response.json()
if data['results']:
jokes = [joke['joke'] for joke in data['results']]
return f"Found {len(jokes)} jokes:\n" + "\n\n".join(jokes)
else:
return f"No jokes found for the term '{term}'."
except Exception as e:
return f"Error searching for jokes: {str(e)}"
final_answer = FinalAnswerTool()
web_search_tool = DuckDuckGoSearchTool()
visit_webpage_tool = VisitWebpageTool()
# If the agent does not answer, the model is overloaded, please use another model or the following Hugging Face Endpoint that also contains qwen2.5 coder:
# model_id='https://pflgm2locj2t89co.us-east-1.aws.endpoints.huggingface.cloud'
model = HfApiModel(
max_tokens=2096,
temperature=0.5,
model_id='Qwen/Qwen2.5-Coder-32B-Instruct',# it is possible that this model may be overloaded
custom_role_conversions=None,
)
# Import tool from Hub
image_generation_tool = load_tool("agents-course/text-to-image", trust_remote_code=True)
with open("prompts.yaml", 'r') as stream:
prompt_templates = yaml.safe_load(stream)
agent = CodeAgent(
model=model,
tools=[visit_webpage_tool, web_search_tool, final_answer, image_generation_tool, get_current_time_in_timezone, get_random_cocktail, search_dad_jokes, text_to_speech_kokoro], ## add your tools here (don't remove final answer)
max_steps=6,
verbosity_level=1,
grammar=None,
planning_interval=None,
name=None,
description=None,
prompt_templates=prompt_templates
)
GradioUI(agent).launch() |