import gradio as gr import os from pyannote.audio import Pipeline from pyannote.core import Segment from pyannote.audio import Audio from pydub import AudioSegment api_k = os.getenv("API_KEY") # Initialize the pyannote pipeline for speaker diarization pipeline = Pipeline.from_pretrained("pyannote/speaker-diarization-3.1",use_auth_token=api_k) # Function to process the audio input and return both text and audio response def respond(audio_input): # Save the audio input to a file audio_file = audio_input # Perform diarization on the whole file diarization = pipeline(audio_file) return text_response input_audio = gr.Audio( sources=["microphone"], waveform_options=gr.WaveformOptions( waveform_color="#01C6FF", waveform_progress_color="#0066B4", skip_length=2, show_controls=False, ), ) gr.Interface( fn=respond, inputs=input_audio, outputs="text", title="Tommy Vercetti Chatbot", description="Chat with Tommy Vercetti from GTA Vice City. Get responses in both text and voice!" ).launch(debug=True)