|
import gradio as gr |
|
import os |
|
from pyannote.audio import Pipeline |
|
from pyannote.core import Segment |
|
from pyannote.audio import Audio |
|
from pydub import AudioSegment |
|
|
|
api_k = os.getenv("API_KEY") |
|
|
|
pipeline = Pipeline.from_pretrained("pyannote/speaker-diarization-3.1",use_auth_token=api_k) |
|
|
|
|
|
def respond(audio_input): |
|
|
|
audio_file = audio_input |
|
|
|
diarization = pipeline(audio_file) |
|
|
|
return text_response |
|
|
|
input_audio = gr.Audio( |
|
sources=["microphone"], |
|
waveform_options=gr.WaveformOptions( |
|
waveform_color="#01C6FF", |
|
waveform_progress_color="#0066B4", |
|
skip_length=2, |
|
show_controls=False, |
|
), |
|
) |
|
|
|
gr.Interface( |
|
fn=respond, |
|
inputs=input_audio, |
|
outputs="text", |
|
title="Tommy Vercetti Chatbot", |
|
description="Chat with Tommy Vercetti from GTA Vice City. Get responses in both text and voice!" |
|
).launch(debug=True) |
|
|