import gradio as gr import whisper # Library for speech recognition from transformers import pipeline import pandas as pd # Load the Whisper model for speech recognition whisper_model = whisper.load_model("base") # Load the summarization model from Hugging Face summarization = pipeline("summarization", model="google/pegasus-large") def process_audio(audio_file, min_length, max_length): try: # Ensure audio_file is not None and has valid content if audio_file is None: raise ValueError("No audio file provided.") # Use the Whisper model to transcribe the audio file into text result = whisper_model.transcribe(audio_file) text = result['text'] # Check if transcription was successful if not text: raise ValueError("Failed to transcribe the audio. The transcription result is empty.") # Use the summarization pipeline to summarize the transcribed text summary_result = summarization(text, min_length=min_length, max_length=max_length) summary = summary_result[0]['summary_text'] # Check if summarization was successful if not summary: raise ValueError("Failed to summarize the transcript. The summary result is empty.") # Create a DataFrame to store the audio file, transcript, and summary df_results = pd.DataFrame({ "Audio File": [audio_file], # Store the path to the audio file "Transcript": [text], # Store the transcribed text "Summary": [summary] # Store the generated summary }) # Save the results to a CSV file named "results.csv" df_results.to_csv("results.csv", index=False) # Return the transcript and summary to be displayed in the Gradio interface return text, summary except Exception as e: # General error handling error_message = f"An error occurred: {str(e)}" return error_message, error_message # Create a Gradio interface iface = gr.Interface( fn=process_audio, # The function to be called when processing the input inputs=[ gr.Audio(sources="upload", type="filepath", label="Upload your audio file"), # Audio input field for file upload gr.Slider(minimum=10, maximum=50, value=30, label="Minimum Summary Length"), # Slider for setting minimum summary length gr.Slider(minimum=50, maximum=600, value=100, label="Maximum Summary Length") # Slider for setting maximum summary length ], outputs=[ gr.Textbox(label="Transcript"), # Textbox for displaying the transcript gr.Textbox(label="Summary") # Textbox for displaying the summary ], title="Audio to Summarized Transcript", # Title of the app description="Upload an audio file and adjust summary length to get both the transcript and summary." # Description of the app ) # Launch the app iface.launch()