walaa2022 commited on
Commit
2925d63
·
verified ·
1 Parent(s): 58fb228

Create App.py

Browse files
Files changed (1) hide show
  1. App.py +60 -0
App.py ADDED
@@ -0,0 +1,60 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import gradio as gr
3
+ import whisper
4
+ #import sounddevice as sd
5
+ import numpy as np
6
+ from scipy.io.wavfile import write
7
+ from gtts import gTTS
8
+ from io import BytesIO
9
+ from groq import Groq
10
+ import tempfile
11
+
12
+ # Initialize the Groq client
13
+ GROQ_API_KEY = "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"
14
+ client = Groq(api_key=GROQ_API_KEY))
15
+
16
+ # Load the Whisper model
17
+ model = whisper.load_model("base") # You can choose other models like "small", "medium", "large"
18
+
19
+ def process_audio(file_path):
20
+ try:
21
+ # Load the audio file
22
+ audio = whisper.load_audio(file_path)
23
+
24
+ # Transcribe the audio using Whisper
25
+ result = model.transcribe(audio)
26
+ text = result["text"]
27
+
28
+ # Generate a response using Groq
29
+ chat_completion = client.chat.completions.create(
30
+ messages=[{"role": "user", "content": text}],
31
+ model="llama3-8b-8192", # Replace with the correct model if necessary
32
+ )
33
+
34
+ # Access the response using dot notation
35
+ response_message = chat_completion.choices[0].message.content.strip()
36
+
37
+ # Convert the response text to speech
38
+ tts = gTTS(response_message)
39
+ response_audio_io = io.BytesIO()
40
+ tts.write_to_fp(response_audio_io) # Save the audio to the BytesIO object
41
+ response_audio_io.seek(0)
42
+
43
+ # Save audio to a file to ensure it's generated correctly
44
+ with open("response.mp3", "wb") as audio_file:
45
+ audio_file.write(response_audio_io.getvalue())
46
+
47
+ # Return the response text and the path to the saved audio file
48
+ return response_message, "response.mp3"
49
+
50
+ except Exception as e:
51
+ return f"An error occurred: {e}", None
52
+
53
+ iface = gr.Interface(
54
+ fn=process_audio,
55
+ inputs=gr.Audio(type="filepath"), # Use type="filepath"
56
+ outputs=[gr.Textbox(label="Response Text"), gr.Audio(label="Response Audio")],
57
+ live=True
58
+ )
59
+
60
+ iface.launch()