Sangmin commited on
Commit
c5012b6
1 Parent(s): cb5005d

Add an option to choose language

Browse files

Let users specify language for the audio/video contents.

Files changed (1) hide show
  1. app.py +8 -3
app.py CHANGED
@@ -83,7 +83,7 @@ def download_yt_audio(yt_url, filename):
83
  raise gr.Error(str(err))
84
 
85
 
86
- def yt_transcribe(yt_url, task, return_timestamps, max_filesize=75.0):
87
  html_embed_str = _return_yt_html_embed(yt_url)
88
 
89
  with tempfile.TemporaryDirectory() as tmpdirname:
@@ -94,8 +94,12 @@ def yt_transcribe(yt_url, task, return_timestamps, max_filesize=75.0):
94
 
95
  inputs = ffmpeg_read(inputs, pipe.feature_extractor.sampling_rate)
96
  inputs = {"array": inputs, "sampling_rate": pipe.feature_extractor.sampling_rate}
97
-
98
- result = pipe(inputs, batch_size=BATCH_SIZE, generate_kwargs={"task": task}, return_timestamps=return_timestamps)
 
 
 
 
99
 
100
  if return_timestamps:
101
  return html_embed_str, chunks_to_srt(result['chunks'])
@@ -111,6 +115,7 @@ mf_transcribe = gr.Interface(
111
  gr.inputs.Audio(source="microphone", type="filepath", optional=True),
112
  gr.inputs.Radio(["transcribe", "translate"], label="Task", default="transcribe"),
113
  gr.inputs.Checkbox(label="Return timestamps"),
 
114
  ],
115
  outputs="text",
116
  layout="horizontal",
 
83
  raise gr.Error(str(err))
84
 
85
 
86
+ def yt_transcribe(yt_url, task, return_timestamps, language, max_filesize=75.0):
87
  html_embed_str = _return_yt_html_embed(yt_url)
88
 
89
  with tempfile.TemporaryDirectory() as tmpdirname:
 
94
 
95
  inputs = ffmpeg_read(inputs, pipe.feature_extractor.sampling_rate)
96
  inputs = {"array": inputs, "sampling_rate": pipe.feature_extractor.sampling_rate}
97
+
98
+ # Map the language names to their corresponding codes
99
+ language_codes = {"English": "en", "Korean": "ko", "Japanese": "ja"}
100
+ language_code = language_codes.get(language, "en") # Default to "en" if the language is not found
101
+
102
+ result = pipe(inputs, batch_size=BATCH_SIZE, generate_kwargs={"task": task, "language": f"<|{language_code}|>"}, return_timestamps=return_timestamps)
103
 
104
  if return_timestamps:
105
  return html_embed_str, chunks_to_srt(result['chunks'])
 
115
  gr.inputs.Audio(source="microphone", type="filepath", optional=True),
116
  gr.inputs.Radio(["transcribe", "translate"], label="Task", default="transcribe"),
117
  gr.inputs.Checkbox(label="Return timestamps"),
118
+ gr.inputs.Dropdown(choices=["English", "Korean", "Japanese"], label="Language"),
119
  ],
120
  outputs="text",
121
  layout="horizontal",