LumiVoice / app.py
wifix199's picture
Update app.py
bfc5bba verified
from openai import OpenAI
import gradio as gr
from gtts import gTTS
import tempfile
import librosa
import numpy as np
from transformers import WhisperProcessor, WhisperForConditionalGeneration
import torch
# OpenRouter API Setup
client = OpenAI(
base_url="https://openrouter.ai/api/v1",
api_key="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" # Replace with your OpenRouter API Key
)
def voice_assistant(audio_filepath):
if audio_filepath is None:
return "Please record your question.", None
# Load Whisper model and processor
processor = WhisperProcessor.from_pretrained("openai/whisper-large-v3-turbo")
model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-large-v3-turbo")
# Load audio data using librosa
audio_data, sample_rate = librosa.load(audio_filepath, sr=16000)
# Convert audio to text using Whisper
input_features = processor(audio_data, sampling_rate=sample_rate, return_tensors="pt").input_features
predicted_ids = model.generate(input_features)
user_voice = processor.batch_decode(predicted_ids, skip_special_tokens=True)[0]
# Generate AI response using OpenRouter
completion = client.chat.completions.create(
model="qwen/qwen2.5-vl-32b-instruct:free",
messages=[{"role": "user", "content": user_voice}]
)
ai_response = completion.choices[0].message.content
# Convert AI response to speech using gTTS
tts = gTTS(ai_response, lang="en")
temp_audio = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3")
tts.save(temp_audio.name)
return ai_response, temp_audio.name # ✅ Returning both text and audio
# Gradio Interface
iface = gr.Interface(
fn=voice_assistant,
inputs=gr.Audio(sources=["microphone"], type="filepath", label="Speak Your Question"),
outputs=[gr.Textbox(label="AI Response"), gr.Audio(label="Voice Response")],
title="AI Voice Assistant",
description="Speak or type a question, and the AI will respond with voice output.",
live=True
)
iface.launch()