kamal45 commited on
Commit
f30327c
·
verified ·
1 Parent(s): a271f27

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +84 -0
app.py ADDED
@@ -0,0 +1,84 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Install required libraries
2
+ !pip install gradio torch gtts openai-whisper
3
+
4
+ import os
5
+ import torch
6
+ import whisper
7
+ from gtts import gTTS
8
+ import gradio as gr
9
+ from groq import Groq # Assuming Groq is installed and accessible
10
+
11
+ # Load the Whisper model
12
+ device = "cuda" if torch.cuda.is_available() else "cpu"
13
+ model = whisper.load_model("base", device=device)
14
+
15
+ # Set your Groq API key
16
+ os.environ["GROQ_API_KEY"] = "your_groq_api_key_here"
17
+ client = Groq(api_key=os.environ.get("GROQ_API_KEY"))
18
+
19
+ # Function to transcribe audio using Whisper
20
+ def transcribe(audio_path):
21
+ try:
22
+ result = model.transcribe(audio_path)
23
+ return result["text"]
24
+ except Exception as e:
25
+ return f"Error during transcription: {e}"
26
+
27
+ # Function to get response from Groq's LLM
28
+ def get_llm_response(text):
29
+ try:
30
+ chat_completion = client.chat.completions.create(
31
+ messages=[{"role": "user", "content": text}],
32
+ model="llama-3.3-70b-versatile",
33
+ )
34
+ return chat_completion.choices[0].message.content
35
+ except Exception as e:
36
+ return f"Error during LLM response generation: {e}"
37
+
38
+ # Function to convert text to speech
39
+ def text_to_speech(text):
40
+ try:
41
+ tts = gTTS(text, lang="en")
42
+ audio_path = "response.mp3"
43
+ tts.save(audio_path)
44
+ return audio_path
45
+ except Exception as e:
46
+ return f"Error during text-to-speech conversion: {e}"
47
+
48
+ # Combined function for processing audio input and generating audio output
49
+ def process_audio(audio_path):
50
+ transcription = transcribe(audio_path)
51
+ if "Error" in transcription:
52
+ return transcription, None, None
53
+
54
+ llm_response = get_llm_response(transcription)
55
+ if "Error" in llm_response:
56
+ return transcription, llm_response, None
57
+
58
+ audio_response = text_to_speech(llm_response)
59
+ if "Error" in audio_response:
60
+ return transcription, llm_response, audio_response
61
+
62
+ return transcription, llm_response, audio_response
63
+
64
+ # Build the Gradio interface
65
+ with gr.Blocks() as app:
66
+ gr.Markdown("## Real-Time Voice-to-Voice Chatbot")
67
+ with gr.Row():
68
+ with gr.Column():
69
+ audio_input = gr.Audio(type="filepath", label="Speak", interactive=True)
70
+ with gr.Column():
71
+ transcription_output = gr.Textbox(label="Transcription (Text)", lines=2)
72
+ response_output = gr.Textbox(label="Response (LLM Text)", lines=2)
73
+ audio_output = gr.Audio(label="Response (Audio)")
74
+ submit_button = gr.Button("Submit")
75
+
76
+ # Connect the input and output components
77
+ submit_button.click(
78
+ process_audio,
79
+ inputs=[audio_input],
80
+ outputs=[transcription_output, response_output, audio_output],
81
+ )
82
+
83
+ # Launch the app
84
+ app.launch()