Spaces:
Running
Running
from openai import OpenAI | |
import gradio as gr | |
from gtts import gTTS | |
import tempfile | |
import librosa | |
import numpy as np | |
from transformers import WhisperProcessor, WhisperForConditionalGeneration | |
import torch | |
# OpenRouter API Setup | |
client = OpenAI( | |
base_url="https://openrouter.ai/api/v1", | |
api_key="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" # Replace with your OpenRouter API Key | |
) | |
def voice_assistant(audio_filepath): | |
if audio_filepath is None: | |
return "Please record your question.", None | |
# Load Whisper model and processor | |
processor = WhisperProcessor.from_pretrained("openai/whisper-large-v3-turbo") | |
model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-large-v3-turbo") | |
# Load audio data using librosa | |
audio_data, sample_rate = librosa.load(audio_filepath, sr=16000) | |
# Convert audio to text using Whisper | |
input_features = processor(audio_data, sampling_rate=sample_rate, return_tensors="pt").input_features | |
predicted_ids = model.generate(input_features) | |
user_voice = processor.batch_decode(predicted_ids, skip_special_tokens=True)[0] | |
# Generate AI response using OpenRouter | |
completion = client.chat.completions.create( | |
model="qwen/qwen2.5-vl-32b-instruct:free", | |
messages=[{"role": "user", "content": user_voice}] | |
) | |
ai_response = completion.choices[0].message.content | |
# Convert AI response to speech using gTTS | |
tts = gTTS(ai_response, lang="en") | |
temp_audio = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") | |
tts.save(temp_audio.name) | |
return ai_response, temp_audio.name # ✅ Returning both text and audio | |
# Gradio Interface | |
iface = gr.Interface( | |
fn=voice_assistant, | |
inputs=gr.Audio(sources=["microphone"], type="filepath", label="Speak Your Question"), | |
outputs=[gr.Textbox(label="AI Response"), gr.Audio(label="Voice Response")], | |
title="AI Voice Assistant", | |
description="Speak or type a question, and the AI will respond with voice output.", | |
live=True | |
) | |
iface.launch() | |