Spaces:

ajchri5
/

Assignment-2-IT164_ajchri5

Runtime error

App Files Files Community

ajchri5 commited on Nov 19, 2024

Commit

e4528d2

verified ·

1 Parent(s): 1ee60bb

Update app.py

Browse files

Files changed (1) hide show

app.py +24 -8

app.py CHANGED Viewed

@@ -2,22 +2,29 @@ import re
 import gradio as gr
 from transformers import WhisperProcessor, WhisperForConditionalGeneration, pipeline
 import torch
 # Load Whisper model for transcription
-whisper_model_name = "openai/whisper-large"  # You can use base, small, medium, or large based on your needs
 processor = WhisperProcessor.from_pretrained(whisper_model_name)
 model = WhisperForConditionalGeneration.from_pretrained(whisper_model_name)
-# Initialize the language detection model (using zero-shot classification for language detection)
 lang_detect_model = pipeline("zero-shot-classification", model="facebook/bart-large-mnli")
 # Function to transcribe audio to text using Whisper model
 def transcribe_audio(audio_file):
     # Prepare input features for Whisper
-    input_features = processor(audio_file, return_tensors="pt")
     # Generate transcription
     generated_ids = model.generate(input_features["input_features"])
     transcription = processor.decode(generated_ids[0], skip_special_tokens=True)
     return transcription
 # Function to detect the language of the transcription using zero-shot classification
@@ -39,16 +46,25 @@ def cleanup_text(text):
 # Main function to process the audio and detect language
 def process_audio(audio_file):
-    transcription = transcribe_audio(audio_file)  # Transcribe audio to text
-    lang, score = detect_language(transcription)  # Detect the language of the transcription
-    cleaned_text = cleanup_text(transcription)  # Clean up the transcription
-    return cleaned_text, lang, score  # Return cleaned transcription, language, and confidence score
 # Gradio interface
 with gr.Blocks() as demo:
     with gr.Row():
         with gr.Column():
-            audio_input = gr.Audio(label="Record your voice", type="filepath", scale=1)  # Input for audio file
             output_text = gr.Textbox(label="Transcription", scale=1)  # Output text for transcription
             output_lang = gr.Textbox(label="Detected Language", scale=1)  # Output text for detected language
             output_score = gr.Textbox(label="Confidence Score", scale=1)  # Output confidence score

 import gradio as gr
 from transformers import WhisperProcessor, WhisperForConditionalGeneration, pipeline
 import torch
+import numpy as np
+import librosa
 # Load Whisper model for transcription
+whisper_model_name = "openai/whisper-large"
 processor = WhisperProcessor.from_pretrained(whisper_model_name)
 model = WhisperForConditionalGeneration.from_pretrained(whisper_model_name)
+# Initialize the language detection model
 lang_detect_model = pipeline("zero-shot-classification", model="facebook/bart-large-mnli")
 # Function to transcribe audio to text using Whisper model
 def transcribe_audio(audio_file):
+    # Ensure the audio is a numpy array (Gradio input type for audio is numpy)
+    audio = np.array(audio_file)
     # Prepare input features for Whisper
+    input_features = processor(audio, return_tensors="pt", sampling_rate=16000)
     # Generate transcription
     generated_ids = model.generate(input_features["input_features"])
     transcription = processor.decode(generated_ids[0], skip_special_tokens=True)
     return transcription
 # Function to detect the language of the transcription using zero-shot classification
 # Main function to process the audio and detect language
 def process_audio(audio_file):
+    try:
+        transcription = transcribe_audio(audio_file)  # Transcribe audio to text
+        if not transcription.strip():  # If transcription is empty or just whitespace
+            raise ValueError("Transcription is empty.")
+        lang, score = detect_language(transcription)  # Detect the language of the transcription
+        cleaned_text = cleanup_text(transcription)  # Clean up the transcription
+        return cleaned_text, lang, score  # Return cleaned transcription, language, and confidence score
+    except Exception as e:
+        # If any error occurs, return the error message
+        return f"Error: {str(e)}", "", ""
 # Gradio interface
 with gr.Blocks() as demo:
     with gr.Row():
         with gr.Column():
+            audio_input = gr.Audio(label="Record your voice", type="numpy", scale=1)  # Input for live audio (microphone)
             output_text = gr.Textbox(label="Transcription", scale=1)  # Output text for transcription
             output_lang = gr.Textbox(label="Detected Language", scale=1)  # Output text for detected language
             output_score = gr.Textbox(label="Confidence Score", scale=1)  # Output confidence score