Spaces:
Build error
Build error
Update app.py
Browse files
app.py
CHANGED
@@ -164,7 +164,7 @@ class ConversationBot:
|
|
164 |
self.tts = TTS(device="cpu")
|
165 |
# self.t2s = T2S(device="cuda:0")
|
166 |
# self.i2a = I2A(device="cpu")
|
167 |
-
|
168 |
# self.asr = ASR(device="cuda:0")
|
169 |
# self.inpaint = Inpaint(device="cpu")
|
170 |
#self.tts_ood = TTS_OOD(device="cuda:0")
|
@@ -191,13 +191,13 @@ class ConversationBot:
|
|
191 |
# "The input to this tool should be a comma seperated string of three, representing text, note and duration sequence since User Input Text, Note and Duration Sequence are all provided."),
|
192 |
Tool(name="Synthesize Speech Given the User Input Text", func=self.tts.inference,
|
193 |
description="useful for when you want to convert a user input text into speech audio it saved it to a file."
|
194 |
-
"The input to this tool should be a string, representing the text used to be converted to speech.")
|
195 |
# Tool(name="Generate Audio From The Image", func=self.i2a.inference,
|
196 |
# description="useful for when you want to generate an audio based on an image."
|
197 |
# "The input to this tool should be a string, representing the image_path. "),
|
198 |
-
|
199 |
-
|
200 |
-
|
201 |
# Tool(name="Audio Inpainting", func=self.inpaint.show_mel_fn,
|
202 |
# description="useful for when you want to inpaint a mel spectrum of an audio and predict this audio, this tool will generate a mel spectrum and you can inpaint it, receives audio_path as input, "
|
203 |
# "The input to this tool should be a string, representing the audio_path.")]
|
|
|
164 |
self.tts = TTS(device="cpu")
|
165 |
# self.t2s = T2S(device="cuda:0")
|
166 |
# self.i2a = I2A(device="cpu")
|
167 |
+
self.a2t = A2T(device="cpu")
|
168 |
# self.asr = ASR(device="cuda:0")
|
169 |
# self.inpaint = Inpaint(device="cpu")
|
170 |
#self.tts_ood = TTS_OOD(device="cuda:0")
|
|
|
191 |
# "The input to this tool should be a comma seperated string of three, representing text, note and duration sequence since User Input Text, Note and Duration Sequence are all provided."),
|
192 |
Tool(name="Synthesize Speech Given the User Input Text", func=self.tts.inference,
|
193 |
description="useful for when you want to convert a user input text into speech audio it saved it to a file."
|
194 |
+
"The input to this tool should be a string, representing the text used to be converted to speech."),
|
195 |
# Tool(name="Generate Audio From The Image", func=self.i2a.inference,
|
196 |
# description="useful for when you want to generate an audio based on an image."
|
197 |
# "The input to this tool should be a string, representing the image_path. "),
|
198 |
+
Tool(name="Generate Text From The Audio", func=self.a2t.inference,
|
199 |
+
description="useful for when you want to describe an audio in text, receives audio_path as input."
|
200 |
+
"The input to this tool should be a string, representing the audio_path.")]
|
201 |
# Tool(name="Audio Inpainting", func=self.inpaint.show_mel_fn,
|
202 |
# description="useful for when you want to inpaint a mel spectrum of an audio and predict this audio, this tool will generate a mel spectrum and you can inpaint it, receives audio_path as input, "
|
203 |
# "The input to this tool should be a string, representing the audio_path.")]
|