lmzjms commited on
Commit
ba9b7b5
·
1 Parent(s): aff50e9

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +10 -10
app.py CHANGED
@@ -160,14 +160,14 @@ class ConversationBot:
160
  self.llm = OpenAI(temperature=0, openai_api_key=openai_api_key)
161
  # self.t2i = T2I(device="cuda:0")
162
  # self.i2t = ImageCaptioning(device="cuda:0")
163
- # self.t2a = T2A(device="cpu")
164
  self.tts = TTS(device="cpu")
165
  self.t2s = T2S(device="cpu")
166
  # self.i2a = I2A(device="cpu")
167
  # self.a2t = A2T(device="cpu")
168
  # self.asr = ASR(device="cuda:0")
169
  # self.inpaint = Inpaint(device="cpu")
170
- self.tts_ood = TTS_OOD(device="cpu")
171
  self.tools = [
172
  # Tool(name="Generate Image From User Input Text", func=self.t2i.inference,
173
  # description="useful for when you want to generate an image from a user input text and it saved it to a file. like: generate an image of an object or something, or generate an image that includes some objects. "
@@ -175,14 +175,14 @@ class ConversationBot:
175
  # Tool(name="Get Photo Description", func=self.i2t.inference,
176
  # description="useful for when you want to know what is inside the photo. receives image_path as input. "
177
  # "The input to this tool should be a string, representing the image_path. "),
178
- # Tool(name="Generate Audio From User Input Text", func=self.t2a.inference,
179
- # description="useful for when you want to generate an audio from a user input text and it saved it to a file."
180
- # "The input to this tool should be a string, representing the text used to generate audio."),
181
- Tool(
182
- name="Generate human speech with style derived from a speech reference and user input text and save it to a file", func= self.tts_ood.inference,
183
- description="useful for when you want to generate speech samples with styles (e.g., timbre, emotion, and prosody) derived from a reference custom voice."
184
- "Like: Generate a speech with style transferred from this voice. The text is xxx., or speak using the voice of this audio. The text is xxx."
185
- "The input to this tool should be a comma seperated string of two, representing reference audio path and input text."),
186
  Tool(name="Generate singing voice From User Input Text, Note and Duration Sequence", func= self.t2s.inference,
187
  description="useful for when you want to generate a piece of singing voice (Optional: from User Input Text, Note and Duration Sequence) and save it to a file."
188
  "If Like: Generate a piece of singing voice, the input to this tool should be \"\" since there is no User Input Text, Note and Duration Sequence ."
 
160
  self.llm = OpenAI(temperature=0, openai_api_key=openai_api_key)
161
  # self.t2i = T2I(device="cuda:0")
162
  # self.i2t = ImageCaptioning(device="cuda:0")
163
+ self.t2a = T2A(device="cpu")
164
  self.tts = TTS(device="cpu")
165
  self.t2s = T2S(device="cpu")
166
  # self.i2a = I2A(device="cpu")
167
  # self.a2t = A2T(device="cpu")
168
  # self.asr = ASR(device="cuda:0")
169
  # self.inpaint = Inpaint(device="cpu")
170
+ # self.tts_ood = TTS_OOD(device="cpu")
171
  self.tools = [
172
  # Tool(name="Generate Image From User Input Text", func=self.t2i.inference,
173
  # description="useful for when you want to generate an image from a user input text and it saved it to a file. like: generate an image of an object or something, or generate an image that includes some objects. "
 
175
  # Tool(name="Get Photo Description", func=self.i2t.inference,
176
  # description="useful for when you want to know what is inside the photo. receives image_path as input. "
177
  # "The input to this tool should be a string, representing the image_path. "),
178
+ Tool(name="Generate Audio From User Input Text", func=self.t2a.inference,
179
+ description="useful for when you want to generate an audio from a user input text and it saved it to a file."
180
+ "The input to this tool should be a string, representing the text used to generate audio."),
181
+ # Tool(
182
+ # name="Generate human speech with style derived from a speech reference and user input text and save it to a file", func= self.tts_ood.inference,
183
+ # description="useful for when you want to generate speech samples with styles (e.g., timbre, emotion, and prosody) derived from a reference custom voice."
184
+ # "Like: Generate a speech with style transferred from this voice. The text is xxx., or speak using the voice of this audio. The text is xxx."
185
+ # "The input to this tool should be a comma seperated string of two, representing reference audio path and input text."),
186
  Tool(name="Generate singing voice From User Input Text, Note and Duration Sequence", func= self.t2s.inference,
187
  description="useful for when you want to generate a piece of singing voice (Optional: from User Input Text, Note and Duration Sequence) and save it to a file."
188
  "If Like: Generate a piece of singing voice, the input to this tool should be \"\" since there is no User Input Text, Note and Duration Sequence ."