Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -1,51 +1,15 @@
|
|
1 |
from transformers import pipeline
|
2 |
-
import torch
|
3 |
import gradio as gr
|
4 |
-
import librosa
|
5 |
-
import numpy as np
|
6 |
-
import time
|
7 |
|
8 |
p = pipeline("automatic-speech-recognition", model="aware-ai/wav2vec2-base-german")
|
9 |
|
10 |
-
|
11 |
-
|
12 |
-
|
13 |
-
(get_speech_timestamps,
|
14 |
-
_, read_audio,
|
15 |
-
*_) = utils
|
16 |
-
|
17 |
-
def is_speech(wav, sr):
|
18 |
-
speech_timestamps = get_speech_timestamps(wav, model,
|
19 |
-
sampling_rate=sr)
|
20 |
-
|
21 |
-
return len(speech_timestamps) > 0
|
22 |
-
|
23 |
-
def transcribe(audio, state={"text": "", "temp_text": "", "audio": ""}):
|
24 |
-
if state is None:
|
25 |
-
state={"text": "", "temp_text": "", "audio": ""}
|
26 |
-
wav_data, _sr = librosa.load(audio, sr=16000)
|
27 |
-
speech = is_speech(wav_data, _sr)
|
28 |
-
if(speech):
|
29 |
-
if(state["audio"] is ""):
|
30 |
-
state["audio"] = wav_data
|
31 |
-
else:
|
32 |
-
state["audio"] = np.concatenate((state["audio"], wav_data))
|
33 |
-
else:
|
34 |
-
if(state["audio"] is not ""):
|
35 |
-
text = p(state["audio"])["text"] + "\n"
|
36 |
-
state["temp_text"] = text
|
37 |
-
|
38 |
-
state["text"] += state["temp_text"]
|
39 |
-
state["temp_text"] = ""
|
40 |
-
state["audio"] = ""
|
41 |
-
|
42 |
-
time.sleep(0.5)
|
43 |
-
return f'{state["text"]} ( {state["temp_text"]} )', state
|
44 |
|
45 |
gr.Interface(
|
46 |
transcribe,
|
47 |
-
[gr.Audio(source="microphone", type="filepath"
|
48 |
|
49 |
-
[gr.Textbox()
|
50 |
live=True
|
51 |
).launch(server_name = "0.0.0.0")
|
|
|
1 |
from transformers import pipeline
|
|
|
2 |
import gradio as gr
|
|
|
|
|
|
|
3 |
|
4 |
p = pipeline("automatic-speech-recognition", model="aware-ai/wav2vec2-base-german")
|
5 |
|
6 |
+
def transcribe(audio):
|
7 |
+
return p(audio, chunk_length_s=10, stride_length_s=(4, 2))["text"]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
8 |
|
9 |
gr.Interface(
|
10 |
transcribe,
|
11 |
+
[gr.Audio(source="microphone", type="filepath")],
|
12 |
|
13 |
+
[gr.Textbox()],
|
14 |
live=True
|
15 |
).launch(server_name = "0.0.0.0")
|