imansarraf commited on
Commit
467cc5c
·
verified ·
1 Parent(s): 8dfefc8

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +9 -5
app.py CHANGED
@@ -11,10 +11,10 @@ css = """
11
 
12
  seg = Segmenter(ffmpeg_path="ffmpeg",model_path="keras_speech_music_noise_cnn.hdf5" , device="cpu",vad_type="vad")
13
 
14
- recognizer = SpeechRecognizer(language="fa", rate=16000,api_key=GOOGLE_SPEECH_API_KEY, proxies=None)
15
 
16
  def process_segment(args):
17
- segment, wav = args
18
  start, stop = segment
19
  # pp = converter((start, stop))
20
  pp = pcm_to_flac(wav[int(start*16000) : int(stop*16000)])
@@ -28,7 +28,8 @@ def pcm_to_flac(pcm_data, sample_rate=16000):
28
  return flac_data
29
 
30
 
31
- def transcribe_audio(audio_file):
 
32
  text=""
33
  isig,wav = seg(audio_file)
34
  isig = filter_output(isig , max_silence=0.5 ,ignore_small_speech_segments=0.1 , max_speech_len=15 ,split_speech_bigger_than=20)
@@ -36,7 +37,7 @@ def transcribe_audio(audio_file):
36
  print(isig)
37
  results=[]
38
  for segment in isig:
39
- results.append (process_segment((segment, wav)))
40
  for start, stop, tr_beamsearch_lm in results:
41
 
42
  try:
@@ -53,7 +54,10 @@ def transcribe_audio(audio_file):
53
  # Define the Gradio interface
54
  interface = gr.Interface(
55
  fn=transcribe_audio,
56
- inputs=gr.Audio(type="filepath"), # Removed 'source="microphone"'
 
 
 
57
  outputs=gr.Textbox(label="Transcription", elem_id="output-text",interactive=True),
58
  title="Persian Audio Transcription",
59
  description="Upload an audio file or record audio to get the transcription.",
 
11
 
12
  seg = Segmenter(ffmpeg_path="ffmpeg",model_path="keras_speech_music_noise_cnn.hdf5" , device="cpu",vad_type="vad")
13
 
14
+
15
 
16
  def process_segment(args):
17
+ segment, wav,recognizer = args
18
  start, stop = segment
19
  # pp = converter((start, stop))
20
  pp = pcm_to_flac(wav[int(start*16000) : int(stop*16000)])
 
28
  return flac_data
29
 
30
 
31
+ def transcribe_audio(audio_file,lan):
32
+ recognizer = SpeechRecognizer(language=lan, rate=16000,api_key=GOOGLE_SPEECH_API_KEY, proxies=None)
33
  text=""
34
  isig,wav = seg(audio_file)
35
  isig = filter_output(isig , max_silence=0.5 ,ignore_small_speech_segments=0.1 , max_speech_len=15 ,split_speech_bigger_than=20)
 
37
  print(isig)
38
  results=[]
39
  for segment in isig:
40
+ results.append (process_segment((segment, wav,recognizer)))
41
  for start, stop, tr_beamsearch_lm in results:
42
 
43
  try:
 
54
  # Define the Gradio interface
55
  interface = gr.Interface(
56
  fn=transcribe_audio,
57
+ inputs=[
58
+ gr.Audio(type="filepath"),
59
+ gr.Radio(choices=["fa", "en", "ar"], label="Language")
60
+ ],
61
  outputs=gr.Textbox(label="Transcription", elem_id="output-text",interactive=True),
62
  title="Persian Audio Transcription",
63
  description="Upload an audio file or record audio to get the transcription.",