Spaces:
Build error
Build error
Commit
·
b3d591c
1
Parent(s):
6c1ce5e
Update app.py
Browse files
app.py
CHANGED
@@ -1,41 +1,25 @@
|
|
1 |
from svoice.separate import *
|
2 |
-
import scipy.io as
|
3 |
-
from scipy.io.wavfile import write
|
4 |
import gradio as gr
|
5 |
import os
|
6 |
-
|
7 |
-
|
8 |
-
import
|
9 |
from glob import glob
|
10 |
load_model()
|
11 |
|
|
|
12 |
BASE_PATH = os.path.dirname(os.path.abspath(__file__))
|
13 |
os.makedirs('input', exist_ok=True)
|
14 |
os.makedirs('separated', exist_ok=True)
|
15 |
|
16 |
-
|
17 |
-
|
18 |
-
|
19 |
-
# model = ORTModelForSpeechSeq2Seq.from_pretrained("openai/whisper-small", from_transformers=True)
|
20 |
-
# speech_recognition_pipeline = pipeline(
|
21 |
-
# "automatic-speech-recognition",
|
22 |
-
# model=model,
|
23 |
-
# feature_extractor=processor.feature_extractor,
|
24 |
-
# tokenizer=processor.tokenizer,
|
25 |
-
# )
|
26 |
-
# os.makedirs('whisper_checkpoint', exist_ok=True)
|
27 |
-
# model.save_pretrained("whisper_checkpoint")
|
28 |
-
# else:
|
29 |
-
# model = ORTModelForSpeechSeq2Seq.from_pretrained("whisper_checkpoint", from_transformers=False)
|
30 |
-
# speech_recognition_pipeline = pipeline(
|
31 |
-
# "automatic-speech-recognition",
|
32 |
-
# model=model,
|
33 |
-
# feature_extractor=processor.feature_extractor,
|
34 |
-
# tokenizer=processor.tokenizer,
|
35 |
-
# )
|
36 |
-
# print("Whisper ASR model loaded.")
|
37 |
|
38 |
-
|
|
|
|
|
39 |
|
40 |
def separator(audio, rec_audio, example):
|
41 |
outputs= {}
|
@@ -44,21 +28,22 @@ def separator(audio, rec_audio, example):
|
|
44 |
for f in glob('separated/*'):
|
45 |
os.remove(f)
|
46 |
if audio:
|
47 |
-
write('input/original.wav', audio[0], audio[1])
|
48 |
elif rec_audio:
|
49 |
-
write('input/original.wav', rec_audio[0], rec_audio[1])
|
50 |
else:
|
51 |
os.system(f'cp {example} input/original.wav')
|
52 |
separate_demo(mix_dir="./input")
|
53 |
separated_files = glob(os.path.join('separated', "*.wav"))
|
54 |
-
separated_files = [f for f in separated_files if "original.wav" not in f]
|
55 |
-
outputs[
|
56 |
-
for file in sorted(separated_files):
|
57 |
-
# separated_audio = sio.wavfile.read(file)
|
58 |
-
# outputs['transcripts'].append(speech_recognition_pipeline(separated_audio[1])['text'])
|
59 |
-
outputs['transcripts'].append(whisper.transcribe(file)["text"])
|
60 |
-
return sorted(separated_files) + outputs['transcripts']
|
61 |
|
|
|
|
|
|
|
|
|
|
|
|
|
62 |
def set_example_audio(example: list) -> dict:
|
63 |
return gr.Audio.update(value=example[0])
|
64 |
|
@@ -108,7 +93,7 @@ with demo:
|
|
108 |
"samples/mixture2.wav",
|
109 |
"samples/mixture3.wav"
|
110 |
]
|
111 |
-
example_selector = gr.inputs.
|
112 |
button.click(separator, inputs=[input_audio, rec_audio, example_selector], outputs=outputs_audio + outputs_text)
|
113 |
|
114 |
demo.launch()
|
|
|
1 |
from svoice.separate import *
|
2 |
+
import scipy.io.wavfile as wav
|
|
|
3 |
import gradio as gr
|
4 |
import os
|
5 |
+
import torch
|
6 |
+
import soundfile as sf
|
7 |
+
from transformers import pipeline
|
8 |
from glob import glob
|
9 |
load_model()
|
10 |
|
11 |
+
device = "cuda" if torch.cuda.is_available() else "cpu"
|
12 |
BASE_PATH = os.path.dirname(os.path.abspath(__file__))
|
13 |
os.makedirs('input', exist_ok=True)
|
14 |
os.makedirs('separated', exist_ok=True)
|
15 |
|
16 |
+
print("Loading ASR model...")
|
17 |
+
pipe = pipeline("automatic-speech-recognition", model="openai/whisper-base", device=0 if device == "cuda" else -1)
|
18 |
+
print("ASR model loaded!")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
19 |
|
20 |
+
def transcribe_audio(audiopath):
|
21 |
+
audio_input, sr = sf.read(audiopath)
|
22 |
+
return pipe(audio_input, sampling_rate=sr, return_tensors=False, padding=True, max_new_tokens=500)['text']
|
23 |
|
24 |
def separator(audio, rec_audio, example):
|
25 |
outputs= {}
|
|
|
28 |
for f in glob('separated/*'):
|
29 |
os.remove(f)
|
30 |
if audio:
|
31 |
+
wav.write('input/original.wav', audio[0], audio[1])
|
32 |
elif rec_audio:
|
33 |
+
wav.write('input/original.wav', rec_audio[0], rec_audio[1])
|
34 |
else:
|
35 |
os.system(f'cp {example} input/original.wav')
|
36 |
separate_demo(mix_dir="./input")
|
37 |
separated_files = glob(os.path.join('separated', "*.wav"))
|
38 |
+
separated_files = sorted([f for f in separated_files if "original.wav" not in f])
|
39 |
+
outputs["transcripts"] = []
|
|
|
|
|
|
|
|
|
|
|
40 |
|
41 |
+
for i, f in enumerate(separated_files):
|
42 |
+
print(f"Transcribing separated audio {i+1} ...")
|
43 |
+
outputs["transcripts"].append(transcribe_audio(f))
|
44 |
+
print("Text:", outputs["transcripts"][-1])
|
45 |
+
return separated_files + outputs['transcripts']
|
46 |
+
|
47 |
def set_example_audio(example: list) -> dict:
|
48 |
return gr.Audio.update(value=example[0])
|
49 |
|
|
|
93 |
"samples/mixture2.wav",
|
94 |
"samples/mixture3.wav"
|
95 |
]
|
96 |
+
example_selector = gr.inputs.Radio(examples, label="Example Audio")
|
97 |
button.click(separator, inputs=[input_audio, rec_audio, example_selector], outputs=outputs_audio + outputs_text)
|
98 |
|
99 |
demo.launch()
|