Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -1,10 +1,11 @@
|
|
1 |
-
|
2 |
import os
|
3 |
import torch
|
4 |
import whisper
|
5 |
from gtts import gTTS
|
6 |
import gradio as gr
|
7 |
-
from groq import Groq
|
|
|
|
|
8 |
|
9 |
# Load the Whisper model
|
10 |
device = "cuda" if torch.cuda.is_available() else "cpu"
|
@@ -14,14 +15,21 @@ GROQ_API_KEY ="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"
|
|
14 |
|
15 |
Client = Groq(api_key=GROQ_API_KEY)
|
16 |
|
17 |
-
# Set your Groq API key
|
18 |
os.environ["GROQ_API_KEY"] = "your_groq_api_key_here"
|
19 |
client = Groq(api_key=os.environ.get("GROQ_API_KEY"))
|
20 |
|
21 |
# Function to transcribe audio using Whisper
|
22 |
-
def transcribe(
|
23 |
try:
|
|
|
|
|
|
|
|
|
|
|
|
|
24 |
result = model.transcribe(audio_path)
|
|
|
25 |
return result["text"]
|
26 |
except Exception as e:
|
27 |
return f"Error during transcription: {e}"
|
@@ -48,8 +56,8 @@ def text_to_speech(text):
|
|
48 |
return f"Error during text-to-speech conversion: {e}"
|
49 |
|
50 |
# Combined function for processing audio input and generating audio output
|
51 |
-
def process_audio(
|
52 |
-
transcription = transcribe(
|
53 |
if "Error" in transcription:
|
54 |
return transcription, None, None
|
55 |
|
@@ -68,7 +76,7 @@ with gr.Blocks() as app:
|
|
68 |
gr.Markdown("## Real-Time Voice-to-Voice Chatbot")
|
69 |
with gr.Row():
|
70 |
with gr.Column():
|
71 |
-
audio_input = gr.Audio(type="
|
72 |
with gr.Column():
|
73 |
transcription_output = gr.Textbox(label="Transcription (Text)", lines=2)
|
74 |
response_output = gr.Textbox(label="Response (LLM Text)", lines=2)
|
|
|
|
|
1 |
import os
|
2 |
import torch
|
3 |
import whisper
|
4 |
from gtts import gTTS
|
5 |
import gradio as gr
|
6 |
+
from groq import Groq
|
7 |
+
import numpy as np
|
8 |
+
import io
|
9 |
|
10 |
# Load the Whisper model
|
11 |
device = "cuda" if torch.cuda.is_available() else "cpu"
|
|
|
15 |
|
16 |
Client = Groq(api_key=GROQ_API_KEY)
|
17 |
|
18 |
+
# Set your Groq API key (replace with your actual key or set it in the environment)
|
19 |
os.environ["GROQ_API_KEY"] = "your_groq_api_key_here"
|
20 |
client = Groq(api_key=os.environ.get("GROQ_API_KEY"))
|
21 |
|
22 |
# Function to transcribe audio using Whisper
|
23 |
+
def transcribe(audio_data):
|
24 |
try:
|
25 |
+
# Convert numpy array (audio) to bytes and save it as a temporary file
|
26 |
+
audio_path = "temp_audio.wav"
|
27 |
+
with open(audio_path, "wb") as f:
|
28 |
+
f.write(audio_data)
|
29 |
+
|
30 |
+
# Transcribe the saved audio file
|
31 |
result = model.transcribe(audio_path)
|
32 |
+
os.remove(audio_path) # Clean up the temporary file
|
33 |
return result["text"]
|
34 |
except Exception as e:
|
35 |
return f"Error during transcription: {e}"
|
|
|
56 |
return f"Error during text-to-speech conversion: {e}"
|
57 |
|
58 |
# Combined function for processing audio input and generating audio output
|
59 |
+
def process_audio(audio_data):
|
60 |
+
transcription = transcribe(audio_data)
|
61 |
if "Error" in transcription:
|
62 |
return transcription, None, None
|
63 |
|
|
|
76 |
gr.Markdown("## Real-Time Voice-to-Voice Chatbot")
|
77 |
with gr.Row():
|
78 |
with gr.Column():
|
79 |
+
audio_input = gr.Audio(type="numpy", label="Speak", interactive=True)
|
80 |
with gr.Column():
|
81 |
transcription_output = gr.Textbox(label="Transcription (Text)", lines=2)
|
82 |
response_output = gr.Textbox(label="Response (LLM Text)", lines=2)
|