sal-maq commited on
Commit
2bcdf1f
·
verified ·
1 Parent(s): e7619ed

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +52 -70
app.py CHANGED
@@ -1,81 +1,63 @@
1
  import os
2
- import tempfile
3
- import numpy as np
4
  import gradio as gr
5
  import whisper
6
  from gtts import gTTS
 
7
  from groq import Groq
8
- import soundfile as sf
9
 
10
- # Set up Groq API key
11
- os.environ['GROQ_API_KEY'] = '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'
12
- groq_client = Groq(api_key=os.environ.get('GROQ_API_KEY'))
13
 
14
- # Load Whisper model
15
- whisper_model = whisper.load_model("base")
16
 
17
- def process_audio(audio_file_path):
18
  try:
19
- # Ensure audio_file_path is valid
20
- if not audio_file_path:
21
- raise ValueError("No audio file provided")
22
-
23
- print(f"Received audio file path: {audio_file_path}")
24
-
25
- # Read the audio file from the file path
26
- with open(audio_file_path, 'rb') as f:
27
- audio_data = f.read()
28
 
29
- # Save the audio data to a temporary file
30
- with tempfile.NamedTemporaryFile(delete=False, suffix='.wav') as temp_audio_file:
31
- temp_audio_path = temp_audio_file.name
32
- temp_audio_file.write(audio_data)
33
-
34
- # Ensure the temporary file is properly closed before processing
35
- temp_audio_file.close()
36
 
37
- # Transcribe audio using Whisper
38
- result = whisper_model.transcribe(temp_audio_path)
39
- user_text = result['text']
40
- print(f"Transcribed text: {user_text}")
41
-
42
- # Generate response using Llama 8b model with Groq API
43
- chat_completion = groq_client.chat.completions.create(
44
- messages=[
45
- {
46
- "role": "user",
47
- "content": user_text,
48
- }
49
- ],
50
- model="llama3-8b-8192",
51
  )
52
- response_text = chat_completion.choices[0].message.content
53
- print(f"Response text: {response_text}")
54
-
55
- # Convert response text to speech using gTTS
56
- tts = gTTS(text=response_text, lang='en')
57
- with tempfile.NamedTemporaryFile(delete=False, suffix='.mp3') as temp_audio_file:
58
- response_audio_path = temp_audio_file.name
59
- tts.save(response_audio_path)
60
 
61
- # Ensure the temporary file is properly closed before returning the path
62
- temp_audio_file.close()
63
-
64
- return response_text, response_audio_path
 
 
 
 
 
 
 
 
 
 
 
 
 
65
  except Exception as e:
66
- return f"Error: {str(e)}", None
67
 
68
- # Create Gradio interface with updated layout
69
  with gr.Blocks() as demo:
70
  gr.Markdown(
71
  """
72
  <style>
73
  .gradio-container {
74
  font-family: Arial, sans-serif;
75
- background-color: #e0f7fa; /* Changed background color */
76
  border-radius: 10px;
77
  padding: 20px;
78
  box-shadow: 0 4px 12px rgba(0,0,0,0.2);
 
79
  }
80
  .gradio-input, .gradio-output {
81
  border-radius: 6px;
@@ -83,43 +65,43 @@ with gr.Blocks() as demo:
83
  padding: 10px;
84
  }
85
  .gradio-button {
86
- background-color: #28a745;
87
  color: white;
88
  border-radius: 6px;
89
  border: none;
90
- padding: 8px 16px; /* Adjusted padding */
91
  font-size: 16px; /* Adjusted font size */
 
92
  }
93
  .gradio-button:hover {
94
- background-color: #218838;
95
  }
96
  .gradio-title {
97
- font-size: 24px;
98
  font-weight: bold;
99
  margin-bottom: 20px;
 
100
  }
101
  .gradio-description {
102
- font-size: 14px;
103
  margin-bottom: 20px;
104
- color: #555;
105
  }
106
  </style>
107
  """
108
  )
109
-
110
- gr.Markdown("# Voice-to-Voice Chatbot\nDeveloped by Salman Maqbool")
111
  gr.Markdown("Upload an audio file to interact with the voice-to-voice chatbot. The chatbot will transcribe the audio, generate a response, and provide a spoken reply.")
112
-
113
  with gr.Row():
114
  with gr.Column():
115
- audio_input = gr.Audio(type="filepath", label="Upload Audio File")
116
- submit_button = gr.Button("Submit")
117
 
118
  with gr.Column():
119
- response_text = gr.Textbox(label="Response Text", placeholder="Generated response will appear here")
120
- response_audio = gr.Audio(label="Response Audio", type="filepath")
121
-
122
- submit_button.click(process_audio, inputs=audio_input, outputs=[response_text, response_audio])
123
 
124
  # Launch the Gradio app
125
- demo.launch()
 
1
  import os
 
 
2
  import gradio as gr
3
  import whisper
4
  from gtts import gTTS
5
+ import io
6
  from groq import Groq
 
7
 
8
+ # Initialize the Groq client
9
+ client = Groq(api_key=os.environ.get("GROQ_API_KEY"))
 
10
 
11
+ # Load the Whisper model
12
+ model = whisper.load_model("base")
13
 
14
+ def process_audio(file_path):
15
  try:
16
+ # Load the audio file
17
+ audio = whisper.load_audio(file_path)
 
 
 
 
 
 
 
18
 
19
+ # Transcribe the audio using Whisper
20
+ result = model.transcribe(audio)
21
+ text = result["text"]
 
 
 
 
22
 
23
+ # Generate a response using Groq
24
+ chat_completion = client.chat.completions.create(
25
+ messages=[{"role": "user", "content": text}],
26
+ model="llama3-8b-8192", # Replace with the correct model if necessary
 
 
 
 
 
 
 
 
 
 
27
  )
 
 
 
 
 
 
 
 
28
 
29
+ # Access the response using dot notation
30
+ response_message = chat_completion.choices[0].message.content.strip()
31
+
32
+ # Convert the response text to speech
33
+ tts = gTTS(response_message)
34
+ response_audio_io = io.BytesIO()
35
+ tts.write_to_fp(response_audio_io) # Save the audio to the BytesIO object
36
+ response_audio_io.seek(0)
37
+
38
+ # Save audio to a file to ensure it's generated correctly
39
+ response_audio_path = "response.mp3"
40
+ with open(response_audio_path, "wb") as audio_file:
41
+ audio_file.write(response_audio_io.getvalue())
42
+
43
+ # Return the response text and the path to the saved audio file
44
+ return response_message, response_audio_path
45
+
46
  except Exception as e:
47
+ return f"An error occurred: {e}", None
48
 
49
+ # Create the Gradio interface with customized UI
50
  with gr.Blocks() as demo:
51
  gr.Markdown(
52
  """
53
  <style>
54
  .gradio-container {
55
  font-family: Arial, sans-serif;
56
+ background-color: #f0f4c3; /* Light green background color */
57
  border-radius: 10px;
58
  padding: 20px;
59
  box-shadow: 0 4px 12px rgba(0,0,0,0.2);
60
+ text-align: center;
61
  }
62
  .gradio-input, .gradio-output {
63
  border-radius: 6px;
 
65
  padding: 10px;
66
  }
67
  .gradio-button {
68
+ background-color: #ff7043;
69
  color: white;
70
  border-radius: 6px;
71
  border: none;
72
+ padding: 10px 20px; /* Adjusted padding */
73
  font-size: 16px; /* Adjusted font size */
74
+ cursor: pointer;
75
  }
76
  .gradio-button:hover {
77
+ background-color: #e64a19;
78
  }
79
  .gradio-title {
80
+ font-size: 28px;
81
  font-weight: bold;
82
  margin-bottom: 20px;
83
+ color: #37474f;
84
  }
85
  .gradio-description {
86
+ font-size: 16px;
87
  margin-bottom: 20px;
88
+ color: #616161;
89
  }
90
  </style>
91
  """
92
  )
93
+
94
+ gr.Markdown("# Voice-to-Voice Chatbot\nDeveloped by Salman Maqbool ❤️")
95
  gr.Markdown("Upload an audio file to interact with the voice-to-voice chatbot. The chatbot will transcribe the audio, generate a response, and provide a spoken reply.")
96
+
97
  with gr.Row():
98
  with gr.Column():
99
+ gr.Audio(type="filepath", label="Upload Audio File")
100
+ gr.Button("Submit")
101
 
102
  with gr.Column():
103
+ gr.Textbox(label="Response Text", placeholder="The AI-generated response will appear here", lines=5)
104
+ gr.Audio(label="Response Audio", type="filepath")
 
 
105
 
106
  # Launch the Gradio app
107
+ demo.launch()