Merlintxu commited on
Commit
007d6a1
verified
1 Parent(s): 3b1a6b5

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +16 -12
app.py CHANGED
@@ -10,11 +10,11 @@ from transformers import logging
10
  import math
11
  import json
12
 
13
- # Suppress warnings
14
  warnings.filterwarnings("ignore")
15
  logging.set_verbosity_error()
16
 
17
- # Updated models by language
18
  MODELS = {
19
  "es": [
20
  "openai/whisper-large-v3",
@@ -29,13 +29,13 @@ MODELS = {
29
  "pt": [
30
  "facebook/wav2vec2-large-xlsr-53-portuguese",
31
  "openai/whisper-medium",
32
- "jonatasgrosman/wav2vec2-large-xlsr-53-portuguese"
33
  ]
34
  }
35
 
36
  def convert_audio_to_wav(audio_path):
37
  if os.path.isdir(audio_path):
38
- raise ValueError(f"The path provided is a directory, not a file: {audio_path}")
39
  wav_path = "converted_audio.wav"
40
  command = ["ffmpeg", "-i", audio_path, "-ac", "1", "-ar", "16000", wav_path]
41
  subprocess.run(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, check=True)
@@ -45,7 +45,7 @@ def detect_language(audio_path):
45
  try:
46
  speech, _ = librosa.load(audio_path, sr=16000, duration=30)
47
  except Exception as e:
48
- raise ValueError(f"Error loading audio file with librosa: {e}")
49
 
50
  processor = WhisperProcessor.from_pretrained("openai/whisper-base")
51
  model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-base")
@@ -75,7 +75,7 @@ def transcribe_audio_stream(audio, model_name):
75
  processor = WhisperProcessor.from_pretrained(model_name)
76
  model = WhisperForConditionalGeneration.from_pretrained(model_name)
77
 
78
- chunk_duration = 30 # seconds
79
 
80
  for i in range(0, int(duration), chunk_duration):
81
  end = min(i + chunk_duration, duration)
@@ -95,7 +95,7 @@ def transcribe_audio_stream(audio, model_name):
95
  else:
96
  transcriber = pipeline("automatic-speech-recognition", model=model_name)
97
 
98
- chunk_duration = 10 # seconds
99
 
100
  for i in range(0, int(duration), chunk_duration):
101
  end = min(i + chunk_duration, duration)
@@ -133,7 +133,8 @@ def combined_interface(audio, file_format):
133
  language, model_options = detect_and_select_model(audio)
134
  selected_model = model_options[0]
135
 
136
- yield language, model_options, selected_model, "", 0, "Initializing..."
 
137
 
138
  transcriptions = []
139
  for partial_transcriptions, progress in transcribe_audio_stream(audio, selected_model):
@@ -141,17 +142,20 @@ def combined_interface(audio, file_format):
141
  full_transcription = " ".join([t["text"] for t in transcriptions])
142
  progress_int = math.floor(progress)
143
  status = f"Transcribing... {progress_int}% complete"
144
- yield language, model_options, selected_model, full_transcription.strip(), progress_int, status
 
145
 
146
- # Save transcription file
147
  file_path = save_transcription(transcriptions, file_format)
148
 
149
- # Clean up temporary files
150
  os.remove("converted_audio.wav")
151
 
 
152
  yield language, model_options, selected_model, full_transcription.strip(), 100, f"Transcription complete! Download {file_path}", file_path
153
 
154
  except Exception as e:
 
155
  yield str(e), [], "", "An error occurred during processing.", 0, "Error", ""
156
 
157
  iface = gr.Interface(
@@ -175,4 +179,4 @@ iface = gr.Interface(
175
  )
176
 
177
  if __name__ == "__main__":
178
- iface.queue().launch()
 
10
  import math
11
  import json
12
 
13
+ # Suprimir advertencias
14
  warnings.filterwarnings("ignore")
15
  logging.set_verbosity_error()
16
 
17
+ # Modelos actualizados por idioma
18
  MODELS = {
19
  "es": [
20
  "openai/whisper-large-v3",
 
29
  "pt": [
30
  "facebook/wav2vec2-large-xlsr-53-portuguese",
31
  "openai/whisper-medium",
32
+ "jonatasgrosman/wav2vec2-xlsr-53-portuguese"
33
  ]
34
  }
35
 
36
  def convert_audio_to_wav(audio_path):
37
  if os.path.isdir(audio_path):
38
+ raise ValueError(f"La ruta proporcionada es un directorio, no un archivo: {audio_path}")
39
  wav_path = "converted_audio.wav"
40
  command = ["ffmpeg", "-i", audio_path, "-ac", "1", "-ar", "16000", wav_path]
41
  subprocess.run(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, check=True)
 
45
  try:
46
  speech, _ = librosa.load(audio_path, sr=16000, duration=30)
47
  except Exception as e:
48
+ raise ValueError(f"Error al cargar el archivo de audio con librosa: {e}")
49
 
50
  processor = WhisperProcessor.from_pretrained("openai/whisper-base")
51
  model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-base")
 
75
  processor = WhisperProcessor.from_pretrained(model_name)
76
  model = WhisperForConditionalGeneration.from_pretrained(model_name)
77
 
78
+ chunk_duration = 30 # segundos
79
 
80
  for i in range(0, int(duration), chunk_duration):
81
  end = min(i + chunk_duration, duration)
 
95
  else:
96
  transcriber = pipeline("automatic-speech-recognition", model=model_name)
97
 
98
+ chunk_duration = 10 # segundos
99
 
100
  for i in range(0, int(duration), chunk_duration):
101
  end = min(i + chunk_duration, duration)
 
133
  language, model_options = detect_and_select_model(audio)
134
  selected_model = model_options[0]
135
 
136
+ # Primer yield: A帽adir None para la s茅ptima salida (Archivo de Descarga)
137
+ yield language, model_options, selected_model, "", 0, "Initializing...", None
138
 
139
  transcriptions = []
140
  for partial_transcriptions, progress in transcribe_audio_stream(audio, selected_model):
 
142
  full_transcription = " ".join([t["text"] for t in transcriptions])
143
  progress_int = math.floor(progress)
144
  status = f"Transcribing... {progress_int}% complete"
145
+ # Yield con None para el archivo de descarga hasta que est茅 completo
146
+ yield language, model_options, selected_model, full_transcription.strip(), progress_int, status, None
147
 
148
+ # Guardar transcripci贸n
149
  file_path = save_transcription(transcriptions, file_format)
150
 
151
+ # Limpiar archivos temporales
152
  os.remove("converted_audio.wav")
153
 
154
+ # Yield final con el archivo de descarga
155
  yield language, model_options, selected_model, full_transcription.strip(), 100, f"Transcription complete! Download {file_path}", file_path
156
 
157
  except Exception as e:
158
+ # Asegurarse de que el yield de error tambi茅n devuelva 7 valores
159
  yield str(e), [], "", "An error occurred during processing.", 0, "Error", ""
160
 
161
  iface = gr.Interface(
 
179
  )
180
 
181
  if __name__ == "__main__":
182
+ iface.queue().launch()