First_agent_uasername

Sleeping

uasername commited on Feb 21

Commit

798b3c8

verified ·

1 Parent(s): 0b7ec94

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -18,6 +18,11 @@ from Code_Functions import speak_text
 from smolagents.agent_types import AgentText
 from smolagents.agent_types import AgentAudio
 @tool
 def lookup_definition(query: str) -> AgentText:
     """Fetches the definition of a word from the Dictionary API and returns it as AgentText.
@@ -69,22 +74,15 @@ def text_to_speech(text: str) -> AgentAudio:
         AgentAudio: An AgentAudio instance containing the file path to the generated audio.
     """
     from gtts import gTTS
-    import os
     AUDIO_OUTPUT_PATH = "/tmp/response.mp3"
     tts = gTTS(text=text, lang='en')
     tts.save(AUDIO_OUTPUT_PATH)
-    # Read the MP3 bytes directly
     with open(AUDIO_OUTPUT_PATH, "rb") as f:
         audio_bytes = f.read()
-    #return AgentAudio(AUDIO_OUTPUT_PATH)
-    # Return AgentAudio, but store the raw bytes
-    return AgentAudio(audio_bytes)  # Not a path anymore
 # # Define the audio output path

 from smolagents.agent_types import AgentText
 from smolagents.agent_types import AgentAudio
+import soundfile
+import io
+import librosa
+import numpy as np
 @tool
 def lookup_definition(query: str) -> AgentText:
     """Fetches the definition of a word from the Dictionary API and returns it as AgentText.
         AgentAudio: An AgentAudio instance containing the file path to the generated audio.
     """
     from gtts import gTTS
     AUDIO_OUTPUT_PATH = "/tmp/response.mp3"
     tts = gTTS(text=text, lang='en')
     tts.save(AUDIO_OUTPUT_PATH)
     with open(AUDIO_OUTPUT_PATH, "rb") as f:
         audio_bytes = f.read()
+    # Convert the MP3 bytes to a numpy array using librosa.
+    # sr=None preserves the original sample rate.
+    audio_np, sr = librosa.load(io.BytesIO(audio_bytes), sr=None)
+    return AgentAudio(audio_np)
 # # Define the audio output path