Spaces:

Staticaliza
/

Voice

Running

App Files Files Community

Staticaliza commited on 20 days ago

Commit

dab6fe2

•

1 Parent(s): 8d6ac2d

Update app.py

Browse files

Files changed (1) hide show

app.py +0 -21

app.py CHANGED Viewed

@@ -28,12 +28,6 @@ device = (
 print(f"Using {device} device")
-pipe = pipeline(
-    "automatic-speech-recognition",
-    model="openai/whisper-large-v3-turbo",
-    torch_dtype=torch.float16,
-    device=device,
-)
 vocos = Vocos.from_pretrained("charactr/vocos-mel-24khz")
 # --------------------- Settings -------------------- #
@@ -231,7 +225,6 @@ def infer(ref_audio_orig, ref_text, gen_text, exp_name, remove_silence, cross_fa
     print(gen_text)
-    gr.Info("Converting audio...")
     with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as f:
         aseg = AudioSegment.from_file(ref_audio_orig)
@@ -250,19 +243,6 @@ def infer(ref_audio_orig, ref_text, gen_text, exp_name, remove_silence, cross_fa
         aseg.export(f.name, format="wav")
         ref_audio = f.name
-    if not ref_text.strip():
-        gr.Info("No reference text provided, transcribing reference audio...")
-        ref_text = pipe(
-            ref_audio,
-            chunk_length_s=30,
-            batch_size=128,
-            generate_kwargs={"task": "transcribe"},
-            return_timestamps=False,
-        )["text"].strip()
-        gr.Info("Finished transcription")
-    else:
-        gr.Info("Using custom reference text...")
     # Add the functionality to ensure it ends with ". "
     if not ref_text.endswith(". "):
         if ref_text.endswith("."):
@@ -279,7 +259,6 @@ def infer(ref_audio_orig, ref_text, gen_text, exp_name, remove_silence, cross_fa
     for i, batch_text in enumerate(gen_text_batches):
         print(f'gen_text {i}', batch_text)
-    gr.Info(f"Generating audio using {exp_name} in {len(gen_text_batches)} batches")
     return infer_batch((audio, sr), ref_text, gen_text_batches, exp_name, remove_silence, cross_fade_duration)

 print(f"Using {device} device")
 vocos = Vocos.from_pretrained("charactr/vocos-mel-24khz")
 # --------------------- Settings -------------------- #
     print(gen_text)
     with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as f:
         aseg = AudioSegment.from_file(ref_audio_orig)
         aseg.export(f.name, format="wav")
         ref_audio = f.name
     # Add the functionality to ensure it ends with ". "
     if not ref_text.endswith(". "):
         if ref_text.endswith("."):
     for i, batch_text in enumerate(gen_text_batches):
         print(f'gen_text {i}', batch_text)
     return infer_batch((audio, sr), ref_text, gen_text_batches, exp_name, remove_silence, cross_fade_duration)