File size: 3,919 Bytes
bacd0f5
 
 
1145213
bacd0f5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
99b56c0
1145213
bacd0f5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
import gradio as gr
import groq
import io
import numpy as np
import soundfile as sf
import pyttsx3  # Text-to-speech conversion

# Initialize text-to-speech engine
tts_engine = pyttsx3.init()

def transcribe_audio(audio, api_key):
    if audio is None:
        return ""
    
    client = groq.Client(api_key=api_key)
    
    # Convert audio to the format expected by the model
    audio_data = audio[1]  # Get the numpy array from the tuple
    buffer = io.BytesIO()
    sf.write(buffer, audio_data, audio[0], format='wav')
    buffer.seek(0)

    try:
        # Use Distil-Whisper English powered by Groq for transcription
        completion = client.audio.transcriptions.create(
            model="distil-whisper-large-v3-en",
            file=("audio.wav", buffer),
            response_format="text"
        )
        return completion
    except Exception as e:
        return f"Error in transcription: {str(e)}"

def generate_response(transcription, api_key):
    if not transcription:
        return "No transcription available. Please try speaking again."
    
    client = groq.Client(api_key=api_key)
    
    try:
        # Use Llama 3 70B powered by Groq for text generation
        completion = client.chat.completions.create(
            model="llama3-70b-8192",
            messages=[{"role": "user", "content": transcription}]
        )
        return completion.choices[0].message.content
    except Exception as e:
        return f"Error in response generation: {str(e)}"

def convert_text_to_speech(text):
    tts_engine.save_to_file(text, 'response_output.wav')
    tts_engine.runAndWait()
    with open("response_output.wav", "rb") as f:
        audio_bytes = f.read()
    return audio_bytes

def process_audio(audio, api_key):
    if not api_key:
        return "Please enter your Groq API key.", "API key is required."
    
    transcription = transcribe_audio(audio, api_key)
    response = generate_response(transcription, api_key)
    
    if "Error" in response:
        return transcription, response, None  # In case of error, return empty audio
    
    audio_output = convert_text_to_speech(response)
    return transcription, response, audio_output

# Custom CSS
custom_css = """
.gradio-container {
    background-color: #f5f5f5;
}
.gr-button-primary {
    background-color: #f55036 !important;
    border-color: #f55036 !important;
}
.gr-button-secondary {
    color: #f55036 !important;
    border-color: #f55036 !important;
}
#groq-badge {
    position: fixed;
    bottom: 20px;
    right: 20px;
    z-index: 1000;
}
"""

# Gradio Interface
with gr.Blocks(theme=gr.themes.Default()) as demo:
    gr.Markdown("# ๐ŸŽ™๏ธ Groq x Gradio Voice-Powered AI Assistant")
    
    api_key_input = gr.Textbox(type="password", label="Enter your Groq API Key")
    
    with gr.Row():
        audio_input = gr.Audio(label="Speak!", type="numpy")
    
    with gr.Row():
        transcription_output = gr.Textbox(label="Transcription")
        response_output = gr.Textbox(label="AI Assistant Response")
        audio_output = gr.Audio(label="Voice Response", type="file")
    
    submit_button = gr.Button("Process", variant="primary")
    
    gr.HTML("""
    <div id="groq-badge">
        <div style="color: #f55036; font-weight: bold;">POWERED BY GROQ</div>
    </div>
    """)
    
    submit_button.click(
        process_audio,
        inputs=[audio_input, api_key_input],
        outputs=[transcription_output, response_output, audio_output]
    )
    
    gr.Markdown("""
    ## How to use this app:
    1. Enter your [Groq API Key](https://console.groq.com/keys) in the provided field.
    2. Click on the microphone icon and speak your message (or upload an audio file).
    3. Click the "Process" button to transcribe your speech and generate a response from our AI assistant.
    4. The transcription, AI assistant response, and voice response will appear.
    """)

demo.launch()