Spaces:
Running
on
Zero
Running
on
Zero
Commit
·
ee3a553
1
Parent(s):
5b58cc8
batch
Browse files
app.py
CHANGED
@@ -218,32 +218,52 @@ def generate_base(subject, setting, ):
|
|
218 |
play_steps = int(frame_rate * play_steps_in_s)
|
219 |
|
220 |
description = "Jenny speaks at an average pace with a calm delivery in a very confined sounding environment with clear audio quality."
|
221 |
-
|
222 |
-
|
223 |
-
|
224 |
-
for i
|
225 |
-
|
226 |
-
|
227 |
-
|
228 |
-
|
229 |
-
|
230 |
-
|
231 |
-
|
232 |
-
|
233 |
-
|
234 |
-
|
235 |
-
|
236 |
-
|
237 |
-
|
238 |
-
|
239 |
-
|
240 |
-
|
241 |
-
|
242 |
-
|
243 |
-
|
244 |
-
|
245 |
-
|
246 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
247 |
|
248 |
|
249 |
with gr.Blocks() as block:
|
|
|
218 |
play_steps = int(frame_rate * play_steps_in_s)
|
219 |
|
220 |
description = "Jenny speaks at an average pace with a calm delivery in a very confined sounding environment with clear audio quality."
|
221 |
+
description = [description for _ in range(len(model_input))]
|
222 |
+
description_tokens = tokenizer(description, return_tensors="pt").input_ids.to(device)
|
223 |
+
|
224 |
+
# for i in range(0, len(model_input), BATCH_SIZE):
|
225 |
+
# inputs = model_input[i:min(i + BATCH_SIZE, len(model_input))]
|
226 |
+
|
227 |
+
# if len(inputs) != 0:
|
228 |
+
# input_ids = tokenizer(description, return_tensors="pt").input_ids.to(device)
|
229 |
+
story = tokenizer(model_input, return_tensors="pt", padding=True).input_ids.to(device)
|
230 |
+
|
231 |
+
speech_output = model.generate(input_ids=description_tokens, prompt_input_ids=story)
|
232 |
+
|
233 |
+
speech_output = [output.cpu().numpy() for output in speech_output]
|
234 |
+
|
235 |
+
for i, new_audio in enumerate(speech_output):
|
236 |
+
if i == 0:
|
237 |
+
gr.Info("Reading story", duration=3)
|
238 |
+
print(f"Sample of length: {round(new_audio.shape[0] / sampling_rate, 2)} seconds")
|
239 |
+
yield story, numpy_to_mp3(new_audio, sampling_rate=sampling_rate)
|
240 |
+
|
241 |
+
# print(f"{i}-th part generated")
|
242 |
+
# pieces += [*speech_output, silence.copy()]
|
243 |
+
|
244 |
+
# for i, sentence in enumerate(model_input):
|
245 |
+
# streamer = ParlerTTSStreamer(model, device=device, play_steps=play_steps)
|
246 |
+
|
247 |
+
# prompt = tokenizer(sentence, return_tensors="pt").to(device)
|
248 |
+
|
249 |
+
# generation_kwargs = dict(
|
250 |
+
# input_ids=inputs.input_ids,
|
251 |
+
# prompt_input_ids=prompt.input_ids,
|
252 |
+
# streamer=streamer,
|
253 |
+
# do_sample=True,
|
254 |
+
# temperature=1.0,
|
255 |
+
# min_new_tokens=10,
|
256 |
+
# )
|
257 |
+
|
258 |
+
# set_seed(SEED)
|
259 |
+
# thread = Thread(target=model.generate, kwargs=generation_kwargs)
|
260 |
+
# thread.start()
|
261 |
+
|
262 |
+
# for new_audio in streamer:
|
263 |
+
# if i == 0:
|
264 |
+
# gr.Info("Reading story", duration=3)
|
265 |
+
# print(f"Sample of length: {round(new_audio.shape[0] / sampling_rate, 2)} seconds")
|
266 |
+
# yield story, numpy_to_mp3(new_audio, sampling_rate=sampling_rate)
|
267 |
|
268 |
|
269 |
with gr.Blocks() as block:
|