Spaces:
Sleeping
Sleeping
Commit
·
3c78a64
1
Parent(s):
bcd9622
Update app.py
Browse files
app.py
CHANGED
@@ -6,8 +6,8 @@ import psutil
|
|
6 |
import time
|
7 |
import whisperx
|
8 |
|
9 |
-
|
10 |
-
model = whisper.load_model('large-v2')
|
11 |
|
12 |
def speech_to_text(mic=None, file=None, lang=None, task='transcribe'):
|
13 |
if mic is not None:
|
@@ -19,13 +19,13 @@ def speech_to_text(mic=None, file=None, lang=None, task='transcribe'):
|
|
19 |
print(lang, task)
|
20 |
|
21 |
time_start = time.time()
|
22 |
-
|
23 |
-
results = model.transcribe(audio, task=task, language=lang, beam_size=5)
|
24 |
#print("Detected language '%s' with probability %f" % (info.language, info.language_probability))
|
25 |
|
26 |
# Decode audio to Text
|
27 |
-
|
28 |
-
objects = results["segments"]
|
29 |
print(objects)
|
30 |
time_end = time.time()
|
31 |
time_diff = time_end - time_start
|
@@ -67,17 +67,16 @@ with gr.Blocks(title='Whisper Demo', theme=theme) as demo:
|
|
67 |
''')
|
68 |
audio_in = gr.Audio(label="Record", source='microphone', type="filepath")
|
69 |
file_in = gr.Audio(label="Upload", source='upload', type="filepath")
|
70 |
-
drop_down = gr.Dropdown(["de", "en", "es", "fr", "ru", None], value=None)
|
71 |
transcribe_btn = gr.Button("Transcribe audio", variant="primary")
|
72 |
-
translate_btn = gr.Button("Translate
|
73 |
trans_df = gr.DataFrame(label="Transcription dataframe", row_count=(0, "dynamic"), max_rows = 10, wrap=True, overflow_row_behaviour='paginate')
|
74 |
sys_info = gr.Markdown("")
|
75 |
-
transcribe_btn.click(lambda x, y: speech_to_text(x, y,
|
76 |
[audio_in, file_in],
|
77 |
[trans_df, sys_info]
|
78 |
)
|
79 |
-
translate_btn.click(lambda x, y, z: speech_to_text(x, y,
|
80 |
-
[audio_in, file_in
|
81 |
[trans_df, sys_info])
|
82 |
|
83 |
demo.launch()
|
|
|
6 |
import time
|
7 |
import whisperx
|
8 |
|
9 |
+
model = WhisperModel('large-v2', device="cuda", compute_type="float16")
|
10 |
+
#model = whisper.load_model('large-v2')
|
11 |
|
12 |
def speech_to_text(mic=None, file=None, lang=None, task='transcribe'):
|
13 |
if mic is not None:
|
|
|
19 |
print(lang, task)
|
20 |
|
21 |
time_start = time.time()
|
22 |
+
segments, info = model.transcribe(audio, task=task, language=lang, beam_size=5)
|
23 |
+
#results = model.transcribe(audio, task=task, language=lang, beam_size=5)
|
24 |
#print("Detected language '%s' with probability %f" % (info.language, info.language_probability))
|
25 |
|
26 |
# Decode audio to Text
|
27 |
+
objects = [s._asdict() for s in segments]
|
28 |
+
#objects = results["segments"]
|
29 |
print(objects)
|
30 |
time_end = time.time()
|
31 |
time_diff = time_end - time_start
|
|
|
67 |
''')
|
68 |
audio_in = gr.Audio(label="Record", source='microphone', type="filepath")
|
69 |
file_in = gr.Audio(label="Upload", source='upload', type="filepath")
|
|
|
70 |
transcribe_btn = gr.Button("Transcribe audio", variant="primary")
|
71 |
+
translate_btn = gr.Button("Translate to English")
|
72 |
trans_df = gr.DataFrame(label="Transcription dataframe", row_count=(0, "dynamic"), max_rows = 10, wrap=True, overflow_row_behaviour='paginate')
|
73 |
sys_info = gr.Markdown("")
|
74 |
+
transcribe_btn.click(lambda x, y: speech_to_text(x, y, task='transcribe'),
|
75 |
[audio_in, file_in],
|
76 |
[trans_df, sys_info]
|
77 |
)
|
78 |
+
translate_btn.click(lambda x, y, z: speech_to_text(x, y, task='translate'),
|
79 |
+
[audio_in, file_in],
|
80 |
[trans_df, sys_info])
|
81 |
|
82 |
demo.launch()
|