Spaces:
Sleeping
Sleeping
File size: 2,026 Bytes
94dd3ca e779dd7 94dd3ca 9150a7c 94dd3ca 9150a7c 94dd3ca e779dd7 94dd3ca 9150a7c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 |
import os
import whisper
from groq import Groq
from gtts import gTTS
import tempfile
import gradio as gr
# Step 1: Set up Whisper for transcription
model = whisper.load_model("base") # Ensure correct version of whisper is installed
# Function to transcribe audio using Whisper
def transcribe_audio(audio_file):
result = model.transcribe(audio_file)
return result["text"]
# Step 2: Set up Groq API for interacting with the LLM (e.g., Llama 3)
api_key = "your_groq_api_key" # Replace with your actual API key
client = Groq(api_key=api_key)
# Function to get a response from the Groq LLM (Llama 3)
def get_groq_response(text):
chat_completion = client.chat.completions.create(
messages=[{"role": "user", "content": text}],
model="llama3-8b-8192", # Use any other model if you prefer
stream=False
)
return chat_completion.choices[0].message.content
# Step 3: Convert text response from LLM to speech using GTTS
def text_to_speech(text):
tts = gTTS(text)
# Save the audio to a temporary file
temp_file = tempfile.NamedTemporaryFile(delete=False)
tts.save(temp_file.name)
# Return the file path for Gradio to play the audio
return temp_file.name
# Step 4: Integrate everything into a Gradio interface
def voice_chatbot(audio_input):
# Step 1: Transcribe the audio using Whisper
transcription = transcribe_audio(audio_input)
# Step 2: Get response from Groq API using the transcription
response = get_groq_response(transcription)
# Step 3: Convert the response text to speech using GTTS
audio_response = text_to_speech(response)
# Return the audio response (Gradio will play it)
return audio_response
# Step 5: Create the Gradio interface (microphone input and audio output)
iface = gr.Interface(fn=voice_chatbot,
inputs=gr.Audio(type="filepath"), # No source="microphone" argument
outputs=gr.Audio(),
live=True)
# Launch the Gradio interface
iface.launch()
|