anshharora commited on
Commit
b19a42f
·
verified ·
1 Parent(s): a246fe4

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +77 -76
app.py CHANGED
@@ -1,45 +1,19 @@
1
  from flask import Flask, request, jsonify, render_template
 
 
2
  import os
3
  import uuid
4
- import base64
5
- import logging
6
- from dotenv import load_dotenv
7
- import io
8
- import tempfile
9
  from gtts import gTTS
10
- from groq import Groq
 
11
  import speech_recognition as sr
12
- from pydub import AudioSegment
 
13
 
14
- # Set up logging
15
- logging.basicConfig(level=logging.DEBUG)
16
- logger = logging.getLogger(__name__)
17
-
18
- class AudioProcessor:
19
- def __init__(self):
20
- self.sample_rate = 16000
21
- self.channels = 1
22
-
23
- def process_audio(self, audio_file):
24
- """Process incoming audio file and convert to proper format"""
25
- try:
26
- with tempfile.TemporaryDirectory() as temp_dir:
27
- # Save incoming audio
28
- input_path = os.path.join(temp_dir, 'input.webm')
29
- audio_file.save(input_path)
30
-
31
- # Convert to WAV using pydub
32
- audio = AudioSegment.from_file(input_path)
33
- audio = audio.set_channels(self.channels)
34
- audio = audio.set_frame_rate(self.sample_rate)
35
-
36
- output_path = os.path.join(temp_dir, 'output.wav')
37
- audio.export(output_path, format='wav')
38
-
39
- return output_path
40
- except Exception as e:
41
- logger.error(f"Error processing audio: {e}")
42
- raise
43
 
44
  # Initialize Flask app
45
  app = Flask(__name__, static_folder='static')
@@ -63,7 +37,7 @@ def load_base_prompt():
63
  with open("base_prompt.txt", "r") as file:
64
  return file.read().strip()
65
  except FileNotFoundError:
66
- logger.warning("base_prompt.txt not found, using default prompt")
67
  return "You are a helpful assistant for language learning."
68
 
69
  # Load the base prompt
@@ -97,7 +71,7 @@ def chat_with_groq(user_message, conversation_id=None):
97
 
98
  return assistant_message
99
  except Exception as e:
100
- logger.error(f"Error in chat_with_groq: {e}")
101
  return f"I apologize, but I'm having trouble responding right now. Error: {str(e)}"
102
 
103
  def text_to_speech(text):
@@ -108,12 +82,17 @@ def text_to_speech(text):
108
  audio_io.seek(0)
109
  return audio_io
110
  except Exception as e:
111
- logger.error(f"Error in text_to_speech: {e}")
112
  return None
113
 
114
- def speech_to_text(audio_path):
115
  try:
116
- with sr.AudioFile(audio_path) as source:
 
 
 
 
 
117
  # Adjust recognition settings
118
  recognizer.dynamic_energy_threshold = True
119
  recognizer.energy_threshold = 4000
@@ -121,18 +100,23 @@ def speech_to_text(audio_path):
121
  # Record the entire audio file
122
  audio = recognizer.record(source)
123
 
124
- # Perform recognition
125
  text = recognizer.recognize_google(audio, language='en-US')
126
  return text
127
 
128
  except sr.UnknownValueError:
129
  return "Could not understand audio"
130
  except sr.RequestError as e:
131
- logger.error(f"Speech recognition request error: {e}")
132
  return f"Could not request results; {str(e)}"
133
  except Exception as e:
134
- logger.error(f"Error in speech_to_text: {e}")
135
  return None
 
 
 
 
 
 
136
 
137
  @app.route('/')
138
  def index():
@@ -165,7 +149,6 @@ def chat():
165
  return jsonify(result)
166
 
167
  except Exception as e:
168
- logger.error(f"Error in chat endpoint: {e}")
169
  return jsonify({'error': str(e)}), 500
170
 
171
  @app.route('/api/voice', methods=['POST'])
@@ -177,36 +160,54 @@ def handle_voice():
177
  audio_file = request.files['audio']
178
  conversation_id = request.form.get('conversation_id', str(uuid.uuid4()))
179
 
180
- # Process audio
181
- audio_processor = AudioProcessor()
182
- wav_path = audio_processor.process_audio(audio_file)
183
-
184
- # Perform speech recognition
185
- text = speech_to_text(wav_path)
186
-
187
- if not text:
188
- return jsonify({'error': 'Could not transcribe audio'}), 400
189
-
190
- # Get chatbot response
191
- response = chat_with_groq(text, conversation_id)
192
-
193
- # Generate voice response
194
- audio_io = text_to_speech(response)
195
- result = {
196
- 'text': text,
197
- 'response': response,
198
- 'conversation_id': conversation_id
199
- }
200
-
201
- if audio_io:
202
- audio_base64 = base64.b64encode(audio_io.getvalue()).decode('utf-8')
203
- result['voice_response'] = audio_base64
204
-
205
- return jsonify(result)
206
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
207
  except Exception as e:
208
- logger.error(f"Error in handle_voice: {e}")
209
- return jsonify({'error': str(e)}), 400
210
-
211
  if __name__ == '__main__':
212
- app.run(host='0.0.0.0', port=7860)
 
1
  from flask import Flask, request, jsonify, render_template
2
+ from dotenv import load_dotenv
3
+ from groq import Groq
4
  import os
5
  import uuid
 
 
 
 
 
6
  from gtts import gTTS
7
+ import io
8
+ import base64
9
  import speech_recognition as sr
10
+ import tempfile
11
+ import json
12
 
13
+ try:
14
+ import pyaudio
15
+ except ImportError:
16
+ print("Warning: PyAudio not available, speech functionality will be limited")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
17
 
18
  # Initialize Flask app
19
  app = Flask(__name__, static_folder='static')
 
37
  with open("base_prompt.txt", "r") as file:
38
  return file.read().strip()
39
  except FileNotFoundError:
40
+ print("Error: base_prompt.txt file not found.")
41
  return "You are a helpful assistant for language learning."
42
 
43
  # Load the base prompt
 
71
 
72
  return assistant_message
73
  except Exception as e:
74
+ print(f"Error in chat_with_groq: {str(e)}")
75
  return f"I apologize, but I'm having trouble responding right now. Error: {str(e)}"
76
 
77
  def text_to_speech(text):
 
82
  audio_io.seek(0)
83
  return audio_io
84
  except Exception as e:
85
+ print(f"Error in text_to_speech: {str(e)}")
86
  return None
87
 
88
+ def speech_to_text(audio_file):
89
  try:
90
+ # Save the uploaded audio to a temporary file
91
+ with tempfile.NamedTemporaryFile(delete=False, suffix='.wav') as temp_audio:
92
+ audio_file.save(temp_audio.name)
93
+
94
+ # Use SpeechRecognition to convert speech to text
95
+ with sr.AudioFile(temp_audio.name) as source:
96
  # Adjust recognition settings
97
  recognizer.dynamic_energy_threshold = True
98
  recognizer.energy_threshold = 4000
 
100
  # Record the entire audio file
101
  audio = recognizer.record(source)
102
 
103
+ # Perform recognition with increased timeout
104
  text = recognizer.recognize_google(audio, language='en-US')
105
  return text
106
 
107
  except sr.UnknownValueError:
108
  return "Could not understand audio"
109
  except sr.RequestError as e:
 
110
  return f"Could not request results; {str(e)}"
111
  except Exception as e:
112
+ print(f"Error in speech_to_text: {str(e)}")
113
  return None
114
+ finally:
115
+ # Clean up temporary file
116
+ try:
117
+ os.unlink(temp_audio.name)
118
+ except:
119
+ pass
120
 
121
  @app.route('/')
122
  def index():
 
149
  return jsonify(result)
150
 
151
  except Exception as e:
 
152
  return jsonify({'error': str(e)}), 500
153
 
154
  @app.route('/api/voice', methods=['POST'])
 
160
  audio_file = request.files['audio']
161
  conversation_id = request.form.get('conversation_id', str(uuid.uuid4()))
162
 
163
+ # Save the audio file temporarily with a .wav extension
164
+ with tempfile.NamedTemporaryFile(delete=False, suffix='.wav') as temp_audio:
165
+ audio_file.save(temp_audio.name)
166
+
167
+ # Use FFmpeg to convert the audio to the correct format
168
+ output_path = temp_audio.name + '_converted.wav'
169
+ os.system(f'ffmpeg -i {temp_audio.name} -acodec pcm_s16le -ac 1 -ar 16000 {output_path}')
170
+
171
+ try:
172
+ # Use the converted file for speech recognition
173
+ with sr.AudioFile(output_path) as source:
174
+ audio = recognizer.record(source)
175
+ text = recognizer.recognize_google(audio)
176
+
177
+ if not text:
178
+ return jsonify({'error': 'Could not transcribe audio'}), 400
179
+
180
+ # Get response from Groq
181
+ response = chat_with_groq(text, conversation_id)
182
+
183
+ # Generate voice response
184
+ audio_io = text_to_speech(response)
185
+ result = {
186
+ 'text': text,
187
+ 'response': response,
188
+ 'conversation_id': conversation_id
189
+ }
190
+
191
+ if audio_io:
192
+ audio_base64 = base64.b64encode(audio_io.getvalue()).decode('utf-8')
193
+ result['voice_response'] = audio_base64
194
+
195
+ return jsonify(result)
196
+
197
+ finally:
198
+ # Clean up temporary files
199
+ try:
200
+ os.remove(temp_audio.name)
201
+ os.remove(output_path)
202
+ except:
203
+ pass
204
+
205
+ except sr.UnknownValueError:
206
+ return jsonify({'error': 'Could not understand audio'}), 400
207
+ except sr.RequestError as e:
208
+ return jsonify({'error': f'Could not request results: {str(e)}'}), 400
209
  except Exception as e:
210
+ print(f"Error in speech_to_text: {str(e)}")
211
+ return jsonify({'error': str(e)}), 400
 
212
  if __name__ == '__main__':
213
+ app.run(host='0.0.0.0', port=7860)