alakxender commited on
Commit
700fda9
·
1 Parent(s): dee3b49
Files changed (1) hide show
  1. app.py +26 -7
app.py CHANGED
@@ -63,14 +63,33 @@ def transcribe(audio_file):
63
  return f"Error during transcription: {str(e)}"
64
 
65
  # Create Gradio interface
66
- iface = gr.Interface(
 
 
 
 
 
 
 
 
 
 
 
 
 
 
67
  fn=transcribe,
68
- inputs=gr.Audio(type="filepath"),
69
- outputs="text",
70
- title="Dhivehi Speech Recognition with Language Model",
71
- description="Upload an audio file to transcribe Dhivehi speech to text using language model enhanced decoding."
 
 
72
  )
73
 
 
 
 
74
 
75
  def install_requirements():
76
  requirements_path = 'requirements.txt'
@@ -116,7 +135,7 @@ if __name__ == "__main__":
116
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
117
  torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32
118
 
119
- MODEL_NAME = "alakxender/wav2vec2-large-mms-1b-dv-syn-md" # Trained on 100% Synthetic Data (130-150 Hours)
120
  # MODEL_NAME = "alakxender/wav2vec2-large-mms-1b-cv" # Trained on Common Voice Data (Unknown Hours)
121
  # MODEL_NAME = "alakxender/whisper-small-dv-syn-md" # Trained on 100% Synthetic Data (150 Hours)
122
  # MODEL_NAME = "alakxender/whisper-small-cv" # Trained on Common Voice Data (Unknown Hours)
@@ -142,6 +161,6 @@ if __name__ == "__main__":
142
  MAX_LENGTH = 120 # 2 minutes
143
  MIN_LENGTH = 1 # 1 second
144
 
145
- iface.launch()
146
  else:
147
  print("Failed to install some requirements")
 
63
  return f"Error during transcription: {str(e)}"
64
 
65
  # Create Gradio interface
66
+
67
+ css = """
68
+ .textbox1 textarea {
69
+ font-size: 18px !important;
70
+ font-family: 'MV_Faseyha', 'Faruma', 'A_Faruma' !important;
71
+ line-height: 1.8 !important;
72
+ }
73
+ .textbox2 textarea {
74
+ display: none;
75
+ }
76
+ """
77
+
78
+ demo = gr.Blocks(css=css)
79
+
80
+ tab_audio = gr.Interface(
81
  fn=transcribe,
82
+ inputs=[
83
+ gr.Audio(sources=["upload","microphone"], type="filepath", label="Audio"),
84
+ ],
85
+ outputs=gr.Textbox(label="Transcription", rtl=True, elem_classes="textbox1"),
86
+ title="Transcribe Dhivehi Audio",
87
+ allow_flagging="never",
88
  )
89
 
90
+ with demo:
91
+ gr.TabbedInterface([tab_audio], ["Audio"])
92
+
93
 
94
  def install_requirements():
95
  requirements_path = 'requirements.txt'
 
135
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
136
  torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32
137
 
138
+ MODEL_NAME = "alakxender/wav2vec2-large-mms-1b-dv-syn-md" # Trained on 100% Synthetic Data (150 Hours)
139
  # MODEL_NAME = "alakxender/wav2vec2-large-mms-1b-cv" # Trained on Common Voice Data (Unknown Hours)
140
  # MODEL_NAME = "alakxender/whisper-small-dv-syn-md" # Trained on 100% Synthetic Data (150 Hours)
141
  # MODEL_NAME = "alakxender/whisper-small-cv" # Trained on Common Voice Data (Unknown Hours)
 
161
  MAX_LENGTH = 120 # 2 minutes
162
  MIN_LENGTH = 1 # 1 second
163
 
164
+ demo.launch()
165
  else:
166
  print("Failed to install some requirements")