cdnieto commited on
Commit
bd5f256
·
1 Parent(s): e7c9cb5

Add app, requirements and transcription script

Browse files
Files changed (3) hide show
  1. app.py +41 -0
  2. requirements.txt +5 -0
  3. transcription.py +60 -0
app.py ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+
3
+ from transcription import process_audio
4
+
5
+
6
+ with gr.Blocks(theme=gr.themes.Default()) as app:
7
+ gr.Markdown("# 🎙️ Voice-Powered AI Assistant.")
8
+
9
+ api_key_input = gr.Textbox(type="password", label="Enter your Groq API Key")
10
+
11
+ with gr.Row():
12
+ audio_inputs = gr.Audio(label="Speak here", type="numpy")
13
+
14
+ with gr.Row():
15
+ transcription_output = gr.Textbox(label="Transcription")
16
+ response_output = gr.Textbox(label="AI Assistant Response")
17
+
18
+ submit_button = gr.Button("Process", variant="primary")
19
+
20
+ gr.HTML("""
21
+ <div id="groq-badge">
22
+ <div style="color: #f55036; font-weight: bold;">POWERED BY GROQ</div>
23
+ </div>
24
+ """)
25
+
26
+ submit_button.click(
27
+ process_audio,
28
+ inputs=[audio_inputs, api_key_input],
29
+ outputs=[transcription_output, response_output]
30
+ )
31
+
32
+ gr.Markdown("""
33
+ ## How to use this app:
34
+ 1. Enter your Groq API Key in the provided field.
35
+ 2. Click on the microphone icon and speak your message (or forever hold your peace)! You can also provide a supported audio file. Supported audio files include mp3, mp4, mpeg, mpga, m4a, wav, and webm file types.
36
+ 3. Click the "Process" button to transcribe your speech and generate a response from our AI assistant.
37
+ 4. The transcription and AI assistant response will appear in the respective text boxes.
38
+
39
+ """)
40
+
41
+ app.launch()
requirements.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ -i https://pypi.org/simple
2
+ gradio==4.44.0
3
+ groq==0.11.0
4
+ numpy==2.1.1
5
+ soundfile==0.12.1
transcription.py ADDED
@@ -0,0 +1,60 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import io
2
+ import groq
3
+ import numpy as np
4
+ import soundfile as sf
5
+
6
+
7
+ def transcribe_audio(audio, api_key):
8
+ if audio is None:
9
+ return ""
10
+
11
+ client = groq.Client(api_key=api_key)
12
+
13
+ audio_data = audio[1] # Get the numpy arry from the tuple
14
+ buffer = io.BytesIO()
15
+ sf.write(buffer, audio_data, audio[0], format='wav')
16
+ buffer.seek(0)
17
+
18
+ bytes_audio = io.BytesIO()
19
+ np.save(bytes_audio, audio_data)
20
+ bytes_audio.seek(0)
21
+
22
+ try:
23
+ # Use Distil-Whisper English powered by Groq for transcription
24
+ completion = client.audio.transcriptions.create(
25
+ model="distil-whisper-large-v3-en",
26
+ file=("audio.wav", buffer),
27
+ response_format="text"
28
+ )
29
+ return completion
30
+
31
+ except Exception as e:
32
+ return f"Error in transcription: {e}"
33
+
34
+
35
+ def generate_response(transcription, api_key):
36
+ if not transcription:
37
+ return "No transcription available. Please try speaking again."
38
+
39
+ client = groq.Client(api_key=api_key)
40
+
41
+ try:
42
+ completion = client.chat.completions.create(
43
+ model="llama3-70b-8192",
44
+ messages=[
45
+ {"role": "system", "content": "You are a helpful assistant."},
46
+ {"role": "user", "content": transcription}
47
+ ]
48
+ )
49
+ return completion.choices[0].message.content
50
+
51
+ except Exception as e:
52
+ return f"Error in response generation: {e}"
53
+
54
+
55
+ def process_audio(audio, api_key):
56
+ if not api_key:
57
+ return "Please enter your Groq API key.", "API key is required."
58
+ transcription = transcribe_audio(audio, api_key)
59
+ response = generate_response(transcription, api_key)
60
+ return transcription, response