Update app.py
Browse files
app.py
CHANGED
@@ -10,11 +10,11 @@ from transformers import logging
|
|
10 |
import math
|
11 |
import json
|
12 |
|
13 |
-
#
|
14 |
warnings.filterwarnings("ignore")
|
15 |
logging.set_verbosity_error()
|
16 |
|
17 |
-
#
|
18 |
MODELS = {
|
19 |
"es": [
|
20 |
"openai/whisper-large-v3",
|
@@ -29,13 +29,13 @@ MODELS = {
|
|
29 |
"pt": [
|
30 |
"facebook/wav2vec2-large-xlsr-53-portuguese",
|
31 |
"openai/whisper-medium",
|
32 |
-
"jonatasgrosman/wav2vec2-
|
33 |
]
|
34 |
}
|
35 |
|
36 |
def convert_audio_to_wav(audio_path):
|
37 |
if os.path.isdir(audio_path):
|
38 |
-
raise ValueError(f"
|
39 |
wav_path = "converted_audio.wav"
|
40 |
command = ["ffmpeg", "-i", audio_path, "-ac", "1", "-ar", "16000", wav_path]
|
41 |
subprocess.run(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, check=True)
|
@@ -45,7 +45,7 @@ def detect_language(audio_path):
|
|
45 |
try:
|
46 |
speech, _ = librosa.load(audio_path, sr=16000, duration=30)
|
47 |
except Exception as e:
|
48 |
-
raise ValueError(f"Error
|
49 |
|
50 |
processor = WhisperProcessor.from_pretrained("openai/whisper-base")
|
51 |
model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-base")
|
@@ -75,7 +75,7 @@ def transcribe_audio_stream(audio, model_name):
|
|
75 |
processor = WhisperProcessor.from_pretrained(model_name)
|
76 |
model = WhisperForConditionalGeneration.from_pretrained(model_name)
|
77 |
|
78 |
-
chunk_duration = 30 #
|
79 |
|
80 |
for i in range(0, int(duration), chunk_duration):
|
81 |
end = min(i + chunk_duration, duration)
|
@@ -95,7 +95,7 @@ def transcribe_audio_stream(audio, model_name):
|
|
95 |
else:
|
96 |
transcriber = pipeline("automatic-speech-recognition", model=model_name)
|
97 |
|
98 |
-
chunk_duration = 10 #
|
99 |
|
100 |
for i in range(0, int(duration), chunk_duration):
|
101 |
end = min(i + chunk_duration, duration)
|
@@ -133,7 +133,8 @@ def combined_interface(audio, file_format):
|
|
133 |
language, model_options = detect_and_select_model(audio)
|
134 |
selected_model = model_options[0]
|
135 |
|
136 |
-
yield
|
|
|
137 |
|
138 |
transcriptions = []
|
139 |
for partial_transcriptions, progress in transcribe_audio_stream(audio, selected_model):
|
@@ -141,17 +142,20 @@ def combined_interface(audio, file_format):
|
|
141 |
full_transcription = " ".join([t["text"] for t in transcriptions])
|
142 |
progress_int = math.floor(progress)
|
143 |
status = f"Transcribing... {progress_int}% complete"
|
144 |
-
|
|
|
145 |
|
146 |
-
#
|
147 |
file_path = save_transcription(transcriptions, file_format)
|
148 |
|
149 |
-
#
|
150 |
os.remove("converted_audio.wav")
|
151 |
|
|
|
152 |
yield language, model_options, selected_model, full_transcription.strip(), 100, f"Transcription complete! Download {file_path}", file_path
|
153 |
|
154 |
except Exception as e:
|
|
|
155 |
yield str(e), [], "", "An error occurred during processing.", 0, "Error", ""
|
156 |
|
157 |
iface = gr.Interface(
|
@@ -175,4 +179,4 @@ iface = gr.Interface(
|
|
175 |
)
|
176 |
|
177 |
if __name__ == "__main__":
|
178 |
-
iface.queue().launch()
|
|
|
10 |
import math
|
11 |
import json
|
12 |
|
13 |
+
# Suprimir advertencias
|
14 |
warnings.filterwarnings("ignore")
|
15 |
logging.set_verbosity_error()
|
16 |
|
17 |
+
# Modelos actualizados por idioma
|
18 |
MODELS = {
|
19 |
"es": [
|
20 |
"openai/whisper-large-v3",
|
|
|
29 |
"pt": [
|
30 |
"facebook/wav2vec2-large-xlsr-53-portuguese",
|
31 |
"openai/whisper-medium",
|
32 |
+
"jonatasgrosman/wav2vec2-xlsr-53-portuguese"
|
33 |
]
|
34 |
}
|
35 |
|
36 |
def convert_audio_to_wav(audio_path):
|
37 |
if os.path.isdir(audio_path):
|
38 |
+
raise ValueError(f"La ruta proporcionada es un directorio, no un archivo: {audio_path}")
|
39 |
wav_path = "converted_audio.wav"
|
40 |
command = ["ffmpeg", "-i", audio_path, "-ac", "1", "-ar", "16000", wav_path]
|
41 |
subprocess.run(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, check=True)
|
|
|
45 |
try:
|
46 |
speech, _ = librosa.load(audio_path, sr=16000, duration=30)
|
47 |
except Exception as e:
|
48 |
+
raise ValueError(f"Error al cargar el archivo de audio con librosa: {e}")
|
49 |
|
50 |
processor = WhisperProcessor.from_pretrained("openai/whisper-base")
|
51 |
model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-base")
|
|
|
75 |
processor = WhisperProcessor.from_pretrained(model_name)
|
76 |
model = WhisperForConditionalGeneration.from_pretrained(model_name)
|
77 |
|
78 |
+
chunk_duration = 30 # segundos
|
79 |
|
80 |
for i in range(0, int(duration), chunk_duration):
|
81 |
end = min(i + chunk_duration, duration)
|
|
|
95 |
else:
|
96 |
transcriber = pipeline("automatic-speech-recognition", model=model_name)
|
97 |
|
98 |
+
chunk_duration = 10 # segundos
|
99 |
|
100 |
for i in range(0, int(duration), chunk_duration):
|
101 |
end = min(i + chunk_duration, duration)
|
|
|
133 |
language, model_options = detect_and_select_model(audio)
|
134 |
selected_model = model_options[0]
|
135 |
|
136 |
+
# Primer yield: A帽adir None para la s茅ptima salida (Archivo de Descarga)
|
137 |
+
yield language, model_options, selected_model, "", 0, "Initializing...", None
|
138 |
|
139 |
transcriptions = []
|
140 |
for partial_transcriptions, progress in transcribe_audio_stream(audio, selected_model):
|
|
|
142 |
full_transcription = " ".join([t["text"] for t in transcriptions])
|
143 |
progress_int = math.floor(progress)
|
144 |
status = f"Transcribing... {progress_int}% complete"
|
145 |
+
# Yield con None para el archivo de descarga hasta que est茅 completo
|
146 |
+
yield language, model_options, selected_model, full_transcription.strip(), progress_int, status, None
|
147 |
|
148 |
+
# Guardar transcripci贸n
|
149 |
file_path = save_transcription(transcriptions, file_format)
|
150 |
|
151 |
+
# Limpiar archivos temporales
|
152 |
os.remove("converted_audio.wav")
|
153 |
|
154 |
+
# Yield final con el archivo de descarga
|
155 |
yield language, model_options, selected_model, full_transcription.strip(), 100, f"Transcription complete! Download {file_path}", file_path
|
156 |
|
157 |
except Exception as e:
|
158 |
+
# Asegurarse de que el yield de error tambi茅n devuelva 7 valores
|
159 |
yield str(e), [], "", "An error occurred during processing.", 0, "Error", ""
|
160 |
|
161 |
iface = gr.Interface(
|
|
|
179 |
)
|
180 |
|
181 |
if __name__ == "__main__":
|
182 |
+
iface.queue().launch()
|