uasername's picture
Update app.py
a3cfb1d verified
raw
history blame
3.04 kB
from smolagents import CodeAgent,DuckDuckGoSearchTool, HfApiModel,load_tool,tool
import datetime
import requests
import pytz
import yaml
from tools.final_answer import FinalAnswerTool
from tools.web_search import DuckDuckGoSearchTool
from tools.visit_webpage import VisitWebpageTool
from Gradio_UI import GradioUI
from kokoro import KPipeline
import soundfile as sf
import os
import numpy as np
import gradio as gr
# Initialize the Kokoro pipeline
pipeline = KPipeline(lang_code='a') # 'a' stands for American English
@tool
def text_to_speech_kokoro(text: str, voice: str = 'af_heart', speed: float = 1.0) -> str:
"""Convert text to speech using the Kokoro-82M model.
Args:
text (str): The text to be converted to speech.
voice (str, optional): The voice to use for speech synthesis. Defaults to 'af_heart'.
speed (float, optional): The speed of the speech. Defaults to 1.0.
Returns:
str: The path to the generated audio file.
"""
try:
# Generate speech audio
generator = pipeline(text, voice=voice, speed=speed, split_pattern=r'\n+')
audio_segments = []
for _, _, audio in generator:
audio_segments.append(audio)
if not audio_segments:
raise ValueError("No audio generated.")
# Concatenate segments into one audio array
full_audio = np.concatenate(audio_segments)
sample_rate = 24000 # Kokoro outputs at 24 kHz
# Ensure the tools folder exists and save the file there
os.makedirs("tools", exist_ok=True)
filename = os.path.join("tools", "output.wav")
sf.write(filename, full_audio, sample_rate)
return filename # Return the file path
except Exception as e:
return f"Error generating speech: {str(e)}"
final_answer = FinalAnswerTool()
web_search_tool = DuckDuckGoSearchTool()
visit_webpage_tool = VisitWebpageTool()
# If the agent does not answer, the model is overloaded, please use another model or the following Hugging Face Endpoint that also contains qwen2.5 coder:
# model_id='https://pflgm2locj2t89co.us-east-1.aws.endpoints.huggingface.cloud'
model = HfApiModel(
max_tokens=2096,
temperature=0.5,
model_id='Qwen/Qwen2.5-Coder-32B-Instruct',# it is possible that this model may be overloaded
custom_role_conversions=None,
)
# Import tool from Hub
image_generation_tool = load_tool("agents-course/text-to-image", trust_remote_code=True)
with open("prompts.yaml", 'r') as stream:
prompt_templates = yaml.safe_load(stream)
agent = CodeAgent(
model=model,
tools=[visit_webpage_tool, web_search_tool, final_answer, image_generation_tool, get_current_time_in_timezone, get_random_cocktail, search_dad_jokes, text_to_speech_kokoro], ## add your tools here (don't remove final answer)
max_steps=6,
verbosity_level=1,
grammar=None,
planning_interval=None,
name=None,
description=None,
prompt_templates=prompt_templates
)
GradioUI(agent).launch()