Spaces:
Sleeping
Sleeping
Commit
·
45496e8
1
Parent(s):
8ecd0fd
up
Browse files- README.md +1 -1
- run_demo_ct2.py +23 -26
README.md
CHANGED
@@ -4,7 +4,7 @@ emoji: 🤫
|
|
4 |
colorFrom: indigo
|
5 |
colorTo: red
|
6 |
sdk: gradio
|
7 |
-
sdk_version:
|
8 |
app_file: app.py
|
9 |
pinned: false
|
10 |
tags:
|
|
|
4 |
colorFrom: indigo
|
5 |
colorTo: red
|
6 |
sdk: gradio
|
7 |
+
sdk_version: 4.16.0
|
8 |
app_file: app.py
|
9 |
pinned: false
|
10 |
tags:
|
run_demo_ct2.py
CHANGED
@@ -42,6 +42,8 @@ GEN_KWARGS = {
|
|
42 |
# "logprob_threshold": None,
|
43 |
# vad threshold
|
44 |
# "no_speech_threshold": None,
|
|
|
|
|
45 |
}
|
46 |
|
47 |
logging.basicConfig(
|
@@ -156,23 +158,24 @@ def infer(model, filename, with_timestamps, return_df=False):
|
|
156 |
return text
|
157 |
|
158 |
|
159 |
-
def transcribe(microphone, file_upload, with_timestamps, model_name=DEFAULT_MODEL_NAME):
|
160 |
-
|
161 |
-
|
162 |
-
|
163 |
-
|
164 |
-
|
165 |
-
|
|
|
166 |
|
167 |
-
elif (microphone is None) and (file_upload is None):
|
168 |
-
|
169 |
|
170 |
-
|
171 |
|
172 |
model = maybe_load_cached_pipeline(model_name)
|
173 |
-
# text = model.transcribe(
|
174 |
-
# text = infer(model,
|
175 |
-
text = infer(model,
|
176 |
|
177 |
logger.info(f'Transcription by `{model_name}`:\n{text.to_json(orient="index", force_ascii=False, indent=2)}\n')
|
178 |
|
@@ -233,8 +236,9 @@ with gr.Blocks() as demo:
|
|
233 |
"""
|
234 |
)
|
235 |
|
236 |
-
microphone_input = gr.
|
237 |
-
upload_input = gr.
|
|
|
238 |
with_timestamps_input = gr.Checkbox(label="With timestamps?")
|
239 |
|
240 |
microphone_transcribe_btn = gr.Button("Transcribe Audio")
|
@@ -247,14 +251,11 @@ with gr.Blocks() as demo:
|
|
247 |
text_output_df2 = gr.DataFrame(
|
248 |
value=default_text_output_df,
|
249 |
label="Transcription",
|
250 |
-
row_count=(0, "dynamic"),
|
251 |
-
max_rows=10,
|
252 |
wrap=True,
|
253 |
-
overflow_row_behaviour="paginate",
|
254 |
)
|
255 |
|
256 |
microphone_transcribe_btn.click(
|
257 |
-
transcribe, inputs=[
|
258 |
)
|
259 |
|
260 |
# with gr.Tab("Transcribe YouTube"):
|
@@ -301,7 +302,7 @@ with gr.Blocks() as demo:
|
|
301 |
"""
|
302 |
)
|
303 |
|
304 |
-
yt_link_input = gr.
|
305 |
download_youtube_btn = gr.Button("Download Youtube video")
|
306 |
downloaded_video_output = gr.Video(label="Video file", mirror_webcam=False)
|
307 |
download_youtube_btn.click(download_video_from_youtube, inputs=[yt_link_input], outputs=[downloaded_video_output])
|
@@ -311,14 +312,10 @@ with gr.Blocks() as demo:
|
|
311 |
text_output_df = gr.DataFrame(
|
312 |
value=default_text_output_df,
|
313 |
label="Transcription",
|
314 |
-
row_count=(0, "dynamic"),
|
315 |
-
max_rows=10,
|
316 |
wrap=True,
|
317 |
-
overflow_row_behaviour="paginate",
|
318 |
)
|
319 |
|
320 |
video_transcribe_btn.click(video_transcribe, inputs=[downloaded_video_output, with_timestamps_input3], outputs=[text_output_df])
|
321 |
|
322 |
-
# demo.launch(server_name="0.0.0.0", debug=True)
|
323 |
-
|
324 |
-
demo.launch(enable_queue=True)
|
|
|
42 |
# "logprob_threshold": None,
|
43 |
# vad threshold
|
44 |
# "no_speech_threshold": None,
|
45 |
+
# "condition_on_previous_text": False, # todo: only for distilled version
|
46 |
+
"vad_filter": True,
|
47 |
}
|
48 |
|
49 |
logging.basicConfig(
|
|
|
158 |
return text
|
159 |
|
160 |
|
161 |
+
# def transcribe(microphone, file_upload, with_timestamps, model_name=DEFAULT_MODEL_NAME):
|
162 |
+
def transcribe(audio_file_path, with_timestamps, model_name=DEFAULT_MODEL_NAME):
|
163 |
+
# warn_output = ""
|
164 |
+
# if (microphone is not None) and (file_upload is not None):
|
165 |
+
# warn_output = (
|
166 |
+
# "WARNING: You've uploaded an audio file and used the microphone. "
|
167 |
+
# "The recorded file from the microphone will be used and the uploaded audio will be discarded.\n"
|
168 |
+
# )
|
169 |
|
170 |
+
# elif (microphone is None) and (file_upload is None):
|
171 |
+
# return "ERROR: You have to either use the microphone or upload an audio file"
|
172 |
|
173 |
+
# audio_file_path = microphone if microphone is not None else file_upload
|
174 |
|
175 |
model = maybe_load_cached_pipeline(model_name)
|
176 |
+
# text = model.transcribe(audio_file_path, **GEN_KWARGS)["text"]
|
177 |
+
# text = infer(model, audio_file_path, with_timestamps)
|
178 |
+
text = infer(model, audio_file_path, with_timestamps, return_df=True)
|
179 |
|
180 |
logger.info(f'Transcription by `{model_name}`:\n{text.to_json(orient="index", force_ascii=False, indent=2)}\n')
|
181 |
|
|
|
236 |
"""
|
237 |
)
|
238 |
|
239 |
+
# microphone_input = gr.Audio(sources="microphone", type="filepath", label="Record")
|
240 |
+
# upload_input = gr.Audio(sources="upload", type="filepath", label="Upload File")
|
241 |
+
audio_file_path = gr.Audio(sources=["microphone", "upload"], type="filepath", label="Record or upload file")
|
242 |
with_timestamps_input = gr.Checkbox(label="With timestamps?")
|
243 |
|
244 |
microphone_transcribe_btn = gr.Button("Transcribe Audio")
|
|
|
251 |
text_output_df2 = gr.DataFrame(
|
252 |
value=default_text_output_df,
|
253 |
label="Transcription",
|
|
|
|
|
254 |
wrap=True,
|
|
|
255 |
)
|
256 |
|
257 |
microphone_transcribe_btn.click(
|
258 |
+
transcribe, inputs=[audio_file_path, with_timestamps_input], outputs=text_output_df2
|
259 |
)
|
260 |
|
261 |
# with gr.Tab("Transcribe YouTube"):
|
|
|
302 |
"""
|
303 |
)
|
304 |
|
305 |
+
yt_link_input = gr.Textbox(lines=1, placeholder="Paste the URL to a YouTube video here", label="YouTube URL")
|
306 |
download_youtube_btn = gr.Button("Download Youtube video")
|
307 |
downloaded_video_output = gr.Video(label="Video file", mirror_webcam=False)
|
308 |
download_youtube_btn.click(download_video_from_youtube, inputs=[yt_link_input], outputs=[downloaded_video_output])
|
|
|
312 |
text_output_df = gr.DataFrame(
|
313 |
value=default_text_output_df,
|
314 |
label="Transcription",
|
|
|
|
|
315 |
wrap=True,
|
|
|
316 |
)
|
317 |
|
318 |
video_transcribe_btn.click(video_transcribe, inputs=[downloaded_video_output, with_timestamps_input3], outputs=[text_output_df])
|
319 |
|
320 |
+
# demo.queue(max_size=10).launch(server_name="0.0.0.0", debug=True, ssl_certfile="/home/bhuang/tools/cert.pem", ssl_keyfile="/home/bhuang/tools/key.pem", ssl_verify=False)
|
321 |
+
demo.queue(max_size=10).launch()
|
|