anshharora commited on
Commit
7d18b6a
·
verified ·
1 Parent(s): c3df5d0

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +71 -68
app.py CHANGED
@@ -1,15 +1,50 @@
1
- from flask import Flask, request, jsonify
2
- import speech_recognition as sr
 
 
3
  import io
4
- import os
5
  import tempfile
6
- from pydub import AudioSegment
7
- import logging
8
 
9
  # Set up logging
10
  logging.basicConfig(level=logging.DEBUG)
11
  logger = logging.getLogger(__name__)
12
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
13
 
14
 
15
  try:
@@ -166,75 +201,43 @@ def chat():
166
  def handle_voice():
167
  try:
168
  if 'audio' not in request.files:
169
- logger.error("No audio file in request")
170
  return jsonify({'error': 'No audio file provided'}), 400
171
 
172
  audio_file = request.files['audio']
173
  conversation_id = request.form.get('conversation_id', str(uuid.uuid4()))
174
 
175
- # Log incoming request details
176
- logger.debug(f"Received audio file: {audio_file.filename}, "
177
- f"Content type: {audio_file.content_type}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
178
 
179
- with tempfile.TemporaryDirectory() as temp_dir:
180
- # Save incoming audio
181
- input_path = os.path.join(temp_dir, 'input.webm')
182
- audio_file.save(input_path)
183
- logger.debug(f"Saved audio file to: {input_path}")
184
-
185
- try:
186
- # Convert audio using pydub
187
- audio = AudioSegment.from_file(input_path)
188
- output_path = os.path.join(temp_dir, 'output.wav')
189
- audio.export(output_path, format="wav",
190
- parameters=["-ac", "1", "-ar", "16000"])
191
- logger.debug("Audio conversion successful")
192
-
193
- # Initialize recognition if not already done
194
- if not hasattr(app, 'recognizer'):
195
- app.recognizer = init_speech_recognition()
196
-
197
- if not app.recognizer:
198
- return jsonify({'error': 'Speech recognition unavailable'}), 503
199
-
200
- # Perform speech recognition
201
- with sr.AudioFile(output_path) as source:
202
- audio_data = app.recognizer.record(source)
203
- text = app.recognizer.recognize_google(audio_data)
204
- logger.debug(f"Speech recognition result: {text}")
205
-
206
- if not text:
207
- return jsonify({'error': 'Could not transcribe audio'}), 400
208
-
209
- # Get chatbot response
210
- response = chat_with_groq(text, conversation_id)
211
-
212
- # Generate voice response
213
- audio_io = text_to_speech(response)
214
- result = {
215
- 'text': text,
216
- 'response': response,
217
- 'conversation_id': conversation_id
218
- }
219
-
220
- if audio_io:
221
- audio_base64 = base64.b64encode(audio_io.getvalue()).decode('utf-8')
222
- result['voice_response'] = audio_base64
223
-
224
- return jsonify(result)
225
-
226
- except sr.UnknownValueError:
227
- logger.error("Speech recognition could not understand audio")
228
- return jsonify({'error': 'Could not understand audio'}), 400
229
- except sr.RequestError as e:
230
- logger.error(f"Speech recognition service error: {e}")
231
- return jsonify({'error': 'Speech recognition service error'}), 503
232
- except Exception as e:
233
- logger.error(f"Audio processing error: {e}")
234
- return jsonify({'error': f'Error processing audio: {str(e)}'}), 400
235
-
236
  except Exception as e:
237
- logger.error(f"General error in handle_voice: {e}")
238
- return jsonify({'error': str(e)}), 500
239
  if __name__ == '__main__':
240
  app.run(host='0.0.0.0', port=7860)
 
1
+ import sounddevice as sd
2
+ import scipy.io.wavfile as wav
3
+ import numpy as np
4
+ from pydub import AudioSegment
5
  import io
 
6
  import tempfile
7
+ import os
8
+
9
 
10
  # Set up logging
11
  logging.basicConfig(level=logging.DEBUG)
12
  logger = logging.getLogger(__name__)
13
 
14
+ class AudioProcessor:
15
+ def __init__(self):
16
+ self.sample_rate = 16000
17
+ self.channels = 1
18
+
19
+ def process_audio(self, audio_file):
20
+ """Process incoming audio file and convert to proper format"""
21
+ with tempfile.TemporaryDirectory() as temp_dir:
22
+ # Save incoming audio
23
+ input_path = os.path.join(temp_dir, 'input.webm')
24
+ audio_file.save(input_path)
25
+
26
+ # Convert to WAV using pydub
27
+ audio = AudioSegment.from_file(input_path)
28
+ audio = audio.set_channels(self.channels)
29
+ audio = audio.set_frame_rate(self.sample_rate)
30
+
31
+ output_path = os.path.join(temp_dir, 'output.wav')
32
+ audio.export(output_path, format='wav')
33
+
34
+ # Read the processed audio file
35
+ return output_path
36
+
37
+ def record_audio(self, duration=5):
38
+ """Record audio using sounddevice"""
39
+ recording = sd.rec(
40
+ int(duration * self.sample_rate),
41
+ samplerate=self.sample_rate,
42
+ channels=self.channels
43
+ )
44
+ sd.wait()
45
+ return recording
46
+
47
+
48
 
49
 
50
  try:
 
201
  def handle_voice():
202
  try:
203
  if 'audio' not in request.files:
 
204
  return jsonify({'error': 'No audio file provided'}), 400
205
 
206
  audio_file = request.files['audio']
207
  conversation_id = request.form.get('conversation_id', str(uuid.uuid4()))
208
 
209
+ # Process audio
210
+ audio_processor = AudioProcessor()
211
+ wav_path = audio_processor.process_audio(audio_file)
212
+
213
+ # Perform speech recognition
214
+ recognizer = sr.Recognizer()
215
+ with sr.AudioFile(wav_path) as source:
216
+ audio_data = recognizer.record(source)
217
+ text = recognizer.recognize_google(audio_data)
218
+
219
+ if not text:
220
+ return jsonify({'error': 'Could not transcribe audio'}), 400
221
+
222
+ # Get chatbot response
223
+ response = chat_with_groq(text, conversation_id)
224
+
225
+ # Generate voice response
226
+ audio_io = text_to_speech(response)
227
+ result = {
228
+ 'text': text,
229
+ 'response': response,
230
+ 'conversation_id': conversation_id
231
+ }
232
+
233
+ if audio_io:
234
+ audio_base64 = base64.b64encode(audio_io.getvalue()).decode('utf-8')
235
+ result['voice_response'] = audio_base64
236
+
237
+ return jsonify(result)
238
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
239
  except Exception as e:
240
+ print(f"Error in handle_voice: {str(e)}")
241
+ return jsonify({'error': str(e)}), 400
242
  if __name__ == '__main__':
243
  app.run(host='0.0.0.0', port=7860)