Spaces:
Running
Running
Staticaliza
commited on
Commit
•
dab6fe2
1
Parent(s):
8d6ac2d
Update app.py
Browse files
app.py
CHANGED
@@ -28,12 +28,6 @@ device = (
|
|
28 |
|
29 |
print(f"Using {device} device")
|
30 |
|
31 |
-
pipe = pipeline(
|
32 |
-
"automatic-speech-recognition",
|
33 |
-
model="openai/whisper-large-v3-turbo",
|
34 |
-
torch_dtype=torch.float16,
|
35 |
-
device=device,
|
36 |
-
)
|
37 |
vocos = Vocos.from_pretrained("charactr/vocos-mel-24khz")
|
38 |
|
39 |
# --------------------- Settings -------------------- #
|
@@ -231,7 +225,6 @@ def infer(ref_audio_orig, ref_text, gen_text, exp_name, remove_silence, cross_fa
|
|
231 |
|
232 |
print(gen_text)
|
233 |
|
234 |
-
gr.Info("Converting audio...")
|
235 |
with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as f:
|
236 |
aseg = AudioSegment.from_file(ref_audio_orig)
|
237 |
|
@@ -250,19 +243,6 @@ def infer(ref_audio_orig, ref_text, gen_text, exp_name, remove_silence, cross_fa
|
|
250 |
aseg.export(f.name, format="wav")
|
251 |
ref_audio = f.name
|
252 |
|
253 |
-
if not ref_text.strip():
|
254 |
-
gr.Info("No reference text provided, transcribing reference audio...")
|
255 |
-
ref_text = pipe(
|
256 |
-
ref_audio,
|
257 |
-
chunk_length_s=30,
|
258 |
-
batch_size=128,
|
259 |
-
generate_kwargs={"task": "transcribe"},
|
260 |
-
return_timestamps=False,
|
261 |
-
)["text"].strip()
|
262 |
-
gr.Info("Finished transcription")
|
263 |
-
else:
|
264 |
-
gr.Info("Using custom reference text...")
|
265 |
-
|
266 |
# Add the functionality to ensure it ends with ". "
|
267 |
if not ref_text.endswith(". "):
|
268 |
if ref_text.endswith("."):
|
@@ -279,7 +259,6 @@ def infer(ref_audio_orig, ref_text, gen_text, exp_name, remove_silence, cross_fa
|
|
279 |
for i, batch_text in enumerate(gen_text_batches):
|
280 |
print(f'gen_text {i}', batch_text)
|
281 |
|
282 |
-
gr.Info(f"Generating audio using {exp_name} in {len(gen_text_batches)} batches")
|
283 |
return infer_batch((audio, sr), ref_text, gen_text_batches, exp_name, remove_silence, cross_fade_duration)
|
284 |
|
285 |
|
|
|
28 |
|
29 |
print(f"Using {device} device")
|
30 |
|
|
|
|
|
|
|
|
|
|
|
|
|
31 |
vocos = Vocos.from_pretrained("charactr/vocos-mel-24khz")
|
32 |
|
33 |
# --------------------- Settings -------------------- #
|
|
|
225 |
|
226 |
print(gen_text)
|
227 |
|
|
|
228 |
with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as f:
|
229 |
aseg = AudioSegment.from_file(ref_audio_orig)
|
230 |
|
|
|
243 |
aseg.export(f.name, format="wav")
|
244 |
ref_audio = f.name
|
245 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
246 |
# Add the functionality to ensure it ends with ". "
|
247 |
if not ref_text.endswith(". "):
|
248 |
if ref_text.endswith("."):
|
|
|
259 |
for i, batch_text in enumerate(gen_text_batches):
|
260 |
print(f'gen_text {i}', batch_text)
|
261 |
|
|
|
262 |
return infer_batch((audio, sr), ref_text, gen_text_batches, exp_name, remove_silence, cross_fade_duration)
|
263 |
|
264 |
|