Spaces:

cdnieto
/

simple-voice-ai-assistant

Sleeping

App Files Files Community

cdnieto commited on Sep 28, 2024

Commit

bd5f256

1 Parent(s): e7c9cb5

Add app, requirements and transcription script

Browse files

Files changed (3) hide show

app.py +41 -0
requirements.txt +5 -0
transcription.py +60 -0

app.py ADDED Viewed

	@@ -0,0 +1,41 @@

+import gradio as gr
+from transcription import process_audio
+with gr.Blocks(theme=gr.themes.Default()) as app:
+    gr.Markdown("# 🎙️ Voice-Powered AI Assistant.")
+    api_key_input = gr.Textbox(type="password", label="Enter your Groq API Key")
+    with gr.Row():
+        audio_inputs = gr.Audio(label="Speak here", type="numpy")
+    with gr.Row():
+        transcription_output = gr.Textbox(label="Transcription")
+        response_output = gr.Textbox(label="AI Assistant Response")
+    submit_button = gr.Button("Process", variant="primary")
+    gr.HTML("""
+    <div id="groq-badge">
+        <div style="color: #f55036; font-weight: bold;">POWERED BY GROQ</div>
+    </div>
+    """)
+    submit_button.click(
+        process_audio,
+        inputs=[audio_inputs, api_key_input],
+        outputs=[transcription_output, response_output]
+    )
+    gr.Markdown("""
+    ## How to use this app:
+    1. Enter your Groq API Key in the provided field.
+    2. Click on the microphone icon and speak your message (or forever hold your peace)! You can also provide a supported audio file. Supported audio files include mp3, mp4, mpeg, mpga, m4a, wav, and webm file types.
+    3. Click the "Process" button to transcribe your speech and generate a response from our AI assistant.
+    4. The transcription and AI assistant response will appear in the respective text boxes.
+    """)
+app.launch()

requirements.txt ADDED Viewed

	@@ -0,0 +1,5 @@

+-i https://pypi.org/simple
+gradio==4.44.0
+groq==0.11.0
+numpy==2.1.1
+soundfile==0.12.1

transcription.py ADDED Viewed

	@@ -0,0 +1,60 @@

+import io
+import groq
+import numpy as np
+import soundfile as sf
+def transcribe_audio(audio, api_key):
+    if audio is None:
+        return ""
+    client = groq.Client(api_key=api_key)
+    audio_data = audio[1]  # Get the numpy arry from the tuple
+    buffer = io.BytesIO()
+    sf.write(buffer, audio_data, audio[0], format='wav')
+    buffer.seek(0)
+    bytes_audio = io.BytesIO()
+    np.save(bytes_audio, audio_data)
+    bytes_audio.seek(0)
+    try:
+        # Use Distil-Whisper English powered by Groq for transcription
+        completion = client.audio.transcriptions.create(
+            model="distil-whisper-large-v3-en",
+            file=("audio.wav", buffer),
+            response_format="text"
+        )
+        return completion
+    except Exception as e:
+        return f"Error in transcription: {e}"
+def generate_response(transcription, api_key):
+    if not transcription:
+        return "No transcription available. Please try speaking again."
+    client = groq.Client(api_key=api_key)
+    try:
+        completion = client.chat.completions.create(
+            model="llama3-70b-8192",
+            messages=[
+                {"role": "system", "content": "You are a helpful assistant."},
+                {"role": "user", "content": transcription}
+            ]
+        )
+        return completion.choices[0].message.content
+    except Exception as e:
+        return f"Error in response generation: {e}"
+def process_audio(audio, api_key):
+    if not api_key:
+        return "Please enter your Groq API key.", "API key is required."
+    transcription = transcribe_audio(audio, api_key)
+    response = generate_response(transcription, api_key)
+    return transcription, response