Spaces:
Running
Running
File size: 2,079 Bytes
896d8a0 f152a90 896d8a0 76084c0 896d8a0 76084c0 b207a62 896d8a0 bfc5bba 896d8a0 b207a62 896d8a0 f152a90 896d8a0 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 |
from openai import OpenAI
import gradio as gr
from gtts import gTTS
import tempfile
import librosa
import numpy as np
from transformers import WhisperProcessor, WhisperForConditionalGeneration
import torch
# OpenRouter API Setup
client = OpenAI(
base_url="https://openrouter.ai/api/v1",
api_key="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" # Replace with your OpenRouter API Key
)
def voice_assistant(audio_filepath):
if audio_filepath is None:
return "Please record your question.", None
# Load Whisper model and processor
processor = WhisperProcessor.from_pretrained("openai/whisper-large-v3-turbo")
model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-large-v3-turbo")
# Load audio data using librosa
audio_data, sample_rate = librosa.load(audio_filepath, sr=16000)
# Convert audio to text using Whisper
input_features = processor(audio_data, sampling_rate=sample_rate, return_tensors="pt").input_features
predicted_ids = model.generate(input_features)
user_voice = processor.batch_decode(predicted_ids, skip_special_tokens=True)[0]
# Generate AI response using OpenRouter
completion = client.chat.completions.create(
model="qwen/qwen2.5-vl-32b-instruct:free",
messages=[{"role": "user", "content": user_voice}]
)
ai_response = completion.choices[0].message.content
# Convert AI response to speech using gTTS
tts = gTTS(ai_response, lang="en")
temp_audio = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3")
tts.save(temp_audio.name)
return ai_response, temp_audio.name # ✅ Returning both text and audio
# Gradio Interface
iface = gr.Interface(
fn=voice_assistant,
inputs=gr.Audio(sources=["microphone"], type="filepath", label="Speak Your Question"),
outputs=[gr.Textbox(label="AI Response"), gr.Audio(label="Voice Response")],
title="AI Voice Assistant",
description="Speak or type a question, and the AI will respond with voice output.",
live=True
)
iface.launch()
|