Spaces:
Running
Running
File size: 3,035 Bytes
9b5b26a c19d193 6aae614 0759335 8fe992b 9b5b26a 8a10c10 d2943b1 089099f 20d75d2 8a10c10 dbfe74a 8a10c10 a3cfb1d dbfe74a a3cfb1d 8a10c10 6da24fc 20d75d2 6da24fc 20d75d2 a3cfb1d 6da24fc dbfe74a a3cfb1d 20d75d2 a3cfb1d 13a73d4 a3cfb1d 13a73d4 8c01ffb 6aae614 0759335 ae7a494 e121372 bf6d34c 29ec968 fe328e0 13d500a 8c01ffb 9b5b26a 8c01ffb 861422e 9b5b26a 8c01ffb 8fe992b 0759335 8c01ffb 861422e 8fe992b 9b5b26a a3cfb1d 20d75d2 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 |
from smolagents import CodeAgent,DuckDuckGoSearchTool, HfApiModel,load_tool,tool
import datetime
import requests
import pytz
import yaml
from tools.final_answer import FinalAnswerTool
from tools.web_search import DuckDuckGoSearchTool
from tools.visit_webpage import VisitWebpageTool
from Gradio_UI import GradioUI
from kokoro import KPipeline
import soundfile as sf
import os
import numpy as np
import gradio as gr
# Initialize the Kokoro pipeline
pipeline = KPipeline(lang_code='a') # 'a' stands for American English
@tool
def text_to_speech_kokoro(text: str, voice: str = 'af_heart', speed: float = 1.0) -> str:
"""Convert text to speech using the Kokoro-82M model.
Args:
text (str): The text to be converted to speech.
voice (str, optional): The voice to use for speech synthesis. Defaults to 'af_heart'.
speed (float, optional): The speed of the speech. Defaults to 1.0.
Returns:
str: The path to the generated audio file.
"""
try:
# Generate speech audio
generator = pipeline(text, voice=voice, speed=speed, split_pattern=r'\n+')
audio_segments = []
for _, _, audio in generator:
audio_segments.append(audio)
if not audio_segments:
raise ValueError("No audio generated.")
# Concatenate segments into one audio array
full_audio = np.concatenate(audio_segments)
sample_rate = 24000 # Kokoro outputs at 24 kHz
# Ensure the tools folder exists and save the file there
os.makedirs("tools", exist_ok=True)
filename = os.path.join("tools", "output.wav")
sf.write(filename, full_audio, sample_rate)
return filename # Return the file path
except Exception as e:
return f"Error generating speech: {str(e)}"
final_answer = FinalAnswerTool()
web_search_tool = DuckDuckGoSearchTool()
visit_webpage_tool = VisitWebpageTool()
# If the agent does not answer, the model is overloaded, please use another model or the following Hugging Face Endpoint that also contains qwen2.5 coder:
# model_id='https://pflgm2locj2t89co.us-east-1.aws.endpoints.huggingface.cloud'
model = HfApiModel(
max_tokens=2096,
temperature=0.5,
model_id='Qwen/Qwen2.5-Coder-32B-Instruct',# it is possible that this model may be overloaded
custom_role_conversions=None,
)
# Import tool from Hub
image_generation_tool = load_tool("agents-course/text-to-image", trust_remote_code=True)
with open("prompts.yaml", 'r') as stream:
prompt_templates = yaml.safe_load(stream)
agent = CodeAgent(
model=model,
tools=[visit_webpage_tool, web_search_tool, final_answer, image_generation_tool, get_current_time_in_timezone, get_random_cocktail, search_dad_jokes, text_to_speech_kokoro], ## add your tools here (don't remove final answer)
max_steps=6,
verbosity_level=1,
grammar=None,
planning_interval=None,
name=None,
description=None,
prompt_templates=prompt_templates
)
GradioUI(agent).launch()
|