Spaces:
Build error
Build error
Commit
·
c220da3
1
Parent(s):
24fab16
try un-batched
Browse files
app.py
CHANGED
@@ -24,6 +24,7 @@ language_names = sorted(TO_LANGUAGE_CODE.keys())
|
|
24 |
CHUNK_LENGTH_S = 30
|
25 |
BATCH_SIZE = 16
|
26 |
NUM_PROC = 8
|
|
|
27 |
FILE_LIMIT_MB = 1000
|
28 |
|
29 |
|
@@ -70,10 +71,7 @@ def forward(batch, task=None, return_timestamps=False):
|
|
70 |
|
71 |
|
72 |
if __name__ == "__main__":
|
73 |
-
|
74 |
-
pool = Pool(NUM_PROC)
|
75 |
-
|
76 |
-
def transcribe_chunked_audio(microphone, file_upload, task, return_timestamps):
|
77 |
warn_output = ""
|
78 |
if (microphone is not None) and (file_upload is not None):
|
79 |
warn_output = (
|
@@ -82,31 +80,19 @@ if __name__ == "__main__":
|
|
82 |
)
|
83 |
|
84 |
elif (microphone is None) and (file_upload is None):
|
85 |
-
return "ERROR: You have to either use the microphone or upload an audio file"
|
86 |
|
87 |
inputs = microphone if microphone is not None else file_upload
|
88 |
|
89 |
-
file_size_mb = os.stat(inputs).st_size / (1024 * 1024)
|
90 |
-
if file_size_mb > FILE_LIMIT_MB:
|
91 |
-
return f"ERROR: File size exceeds file size limit. Got file of size {file_size_mb:.2f}MB for a limit of {FILE_LIMIT_MB}MB.", None
|
92 |
-
|
93 |
with open(inputs, "rb") as f:
|
94 |
inputs = f.read()
|
95 |
|
96 |
-
inputs = ffmpeg_read(inputs,
|
97 |
-
inputs = {"array": inputs, "sampling_rate":
|
98 |
-
|
99 |
-
dataloader = processor.preprocess_batch(inputs, chunk_length_s=CHUNK_LENGTH_S, batch_size=BATCH_SIZE)
|
100 |
|
101 |
-
|
102 |
-
model_outputs = pool.map(partial(forward, task=task, return_timestamps=return_timestamps), dataloader)
|
103 |
-
except ValueError as err:
|
104 |
-
# pre-processor does all the necessary compatibility checks for our audio inputs
|
105 |
-
return err, None
|
106 |
|
107 |
-
|
108 |
-
timestamps = post_processed.get("chunks")
|
109 |
-
return warn_output + post_processed["text"], timestamps
|
110 |
|
111 |
def _return_yt_html_embed(yt_url):
|
112 |
video_id = yt_url.split("?v=")[-1]
|
@@ -124,7 +110,7 @@ if __name__ == "__main__":
|
|
124 |
return html_embed_str, text, timestamps
|
125 |
|
126 |
audio_chunked = gr.Interface(
|
127 |
-
fn=
|
128 |
inputs=[
|
129 |
gr.inputs.Audio(source="microphone", optional=True, type="filepath"),
|
130 |
gr.inputs.Audio(source="upload", optional=True, type="filepath"),
|
|
|
24 |
CHUNK_LENGTH_S = 30
|
25 |
BATCH_SIZE = 16
|
26 |
NUM_PROC = 8
|
27 |
+
SAMPLING_RATE = 16000
|
28 |
FILE_LIMIT_MB = 1000
|
29 |
|
30 |
|
|
|
71 |
|
72 |
|
73 |
if __name__ == "__main__":
|
74 |
+
def transcribe_audio(microphone, file_upload, task, return_timestamps):
|
|
|
|
|
|
|
75 |
warn_output = ""
|
76 |
if (microphone is not None) and (file_upload is not None):
|
77 |
warn_output = (
|
|
|
80 |
)
|
81 |
|
82 |
elif (microphone is None) and (file_upload is None):
|
83 |
+
return "ERROR: You have to either use the microphone or upload an audio file"
|
84 |
|
85 |
inputs = microphone if microphone is not None else file_upload
|
86 |
|
|
|
|
|
|
|
|
|
87 |
with open(inputs, "rb") as f:
|
88 |
inputs = f.read()
|
89 |
|
90 |
+
inputs = ffmpeg_read(inputs, SAMPLING_RATE)
|
91 |
+
inputs = {"array": base64.b64encode(inputs.tobytes()).decode(), "sampling_rate": SAMPLING_RATE}
|
|
|
|
|
92 |
|
93 |
+
text, timestamps = inference(inputs=inputs, task=task, return_timestamps=return_timestamps)
|
|
|
|
|
|
|
|
|
94 |
|
95 |
+
return warn_output + text, timestamps
|
|
|
|
|
96 |
|
97 |
def _return_yt_html_embed(yt_url):
|
98 |
video_id = yt_url.split("?v=")[-1]
|
|
|
110 |
return html_embed_str, text, timestamps
|
111 |
|
112 |
audio_chunked = gr.Interface(
|
113 |
+
fn=transcribe_audio,
|
114 |
inputs=[
|
115 |
gr.inputs.Audio(source="microphone", optional=True, type="filepath"),
|
116 |
gr.inputs.Audio(source="upload", optional=True, type="filepath"),
|