Irpan commited on
Commit
71494c3
·
1 Parent(s): 6502e85
Files changed (2) hide show
  1. app.py +5 -1
  2. asr.py +7 -2
app.py CHANGED
@@ -33,7 +33,11 @@ mms_transcribe = gr.Interface(
33
  mms_synthesize = gr.Interface(
34
  fn=tts.synthesize,
35
  inputs=[
36
- gr.Text(label="Input text"),
 
 
 
 
37
  gr.Dropdown(
38
  choices=[model for model in tts.models_info],
39
  label="Select a Model",
 
33
  mms_synthesize = gr.Interface(
34
  fn=tts.synthesize,
35
  inputs=[
36
+ gr.Text(
37
+ label="Input text",
38
+ max_length=200, # Limit input to 200 characters
39
+ interactive=True,
40
+ ),
41
  gr.Dropdown(
42
  choices=[model for model in tts.models_info],
43
  label="Select a Model",
asr.py CHANGED
@@ -67,13 +67,18 @@ def transcribe(audio_data, model_id) -> str:
67
  # file upload
68
  audio_input, sampling_rate = torchaudio.load(audio_data)
69
  else:
70
- return "<<ERROR: Invalid Audio Input Instance: {}>>".format(type(audio_data))
71
 
 
 
 
 
 
 
72
  model = models_info[model_id]["model"]
73
  processor = models_info[model_id]["processor"]
74
  target_sr = processor.feature_extractor.sampling_rate
75
  ctc_model = models_info[model_id]["ctc_model"]
76
- print(target_sr)
77
 
78
  # Resample if needed
79
  if sampling_rate != target_sr:
 
67
  # file upload
68
  audio_input, sampling_rate = torchaudio.load(audio_data)
69
  else:
70
+ return "<<ERROR: Invalid Audio Input Instance: {}>>".format(type(audio_data)), None
71
 
72
+ print(audio_input.shape)
73
+ # Check audio duration
74
+ duration = audio_input.shape[1] / sampling_rate
75
+ if duration > 10:
76
+ return "<<ERROR: Audio duration exceeds 10 seconds. Please upload a shorter audio clip for faster processing.>>", None
77
+
78
  model = models_info[model_id]["model"]
79
  processor = models_info[model_id]["processor"]
80
  target_sr = processor.feature_extractor.sampling_rate
81
  ctc_model = models_info[model_id]["ctc_model"]
 
82
 
83
  # Resample if needed
84
  if sampling_rate != target_sr: