Spaces:

Kr08
/

ASR_gradio

Build error

Kr08 commited on Sep 2, 2024

Commit

81e4ee2

verified ·

1 Parent(s): a77426c

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -1,36 +1,24 @@
 import gradio as gr
-from audio_processing import process_audio
-import spaces
-@spaces.GPU
-def gradio_process_audio(audio):
-    try:
-        if audio is None:
-            return "No file uploaded", "", ""
-        # The Gradio Audio input with type="numpy" provides a tuple of (sample_rate, audio_data)
-        # This is exactly what process_audio expects, so we can pass it directly
-        detected_lang, transcription, translation = process_audio(audio)
-        return detected_lang, transcription, translation
-    except Exception as e:
-        print(f"Error in gradio_process_audio: {str(e)}")
-        return str(e), "", ""
 iface = gr.Interface(
-    fn=gradio_process_audio,
-    inputs=gr.Audio(type="numpy"),
-    outputs=[
-        gr.Textbox(label="Detected Language"),
-        gr.Textbox(label="Transcription", lines=5),
-        gr.Textbox(label="Translation", lines=5)
-    ],
-    title="Audio Transcription and Translation",
-    description="Upload an audio file to detect its language, transcribe, and translate it.",
-    allow_flagging="never",
-    css=".output-textbox { font-family: 'Noto Sans Devanagari', sans-serif; font-size: 18px; }"
 )
-if __name__ == "__main__":
-    iface.launch()

 import gradio as gr
+from audio_processing import process_audio, print_results
+def transcribe_audio(audio_file):
+    language_segments, final_segments = process_audio(audio_file)
+    output = "Detected language changes:\n\n"
+    for segment in language_segments:
+        output += f"Language: {segment['language']}\n"
+        output += f"Time: {segment['start']:.2f}s - {segment['end']:.2f}s\n\n"
+    output += "Transcription with language detection and speaker diarization:\n\n"
+    for segment in final_segments:
+        output += f"[{segment['start']:.2f}s - {segment['end']:.2f}s] ({segment['language']}) Speaker {segment['speaker']}: {segment['text']}\n"
+        # output += f"[{segment['start']:.2f}s - {segment['end']:.2f}s] ({segment['language']}): {segment['text']}\n"
+    return output
 iface = gr.Interface(
+    fn=transcribe_audio,
+    inputs=gr.Audio(type="filepath"),
+    outputs="text",
+    title="WhisperX Audio Transcription"
 )
+iface.launch()