anshharora commited on
Commit
d4c64bc
·
verified ·
1 Parent(s): b0dba38

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +51 -21
app.py CHANGED
@@ -1,15 +1,15 @@
1
- from flask import Flask, request, jsonify, render_template
2
- from dotenv import load_dotenv
3
- from groq import Groq
4
- import os
5
- import uuid
6
- from gtts import gTTS
7
- import io
8
- import base64
9
  import speech_recognition as sr
 
 
10
  import tempfile
11
- import json
12
  from pydub import AudioSegment
 
 
 
 
 
 
13
 
14
 
15
  try:
@@ -31,6 +31,15 @@ MODEL = "llama3-70b-8192"
31
  # Initialize speech recognition
32
  recognizer = sr.Recognizer()
33
 
 
 
 
 
 
 
 
 
 
34
  # Store conversation history
35
  conversations = {}
36
 
@@ -157,32 +166,47 @@ def chat():
157
  def handle_voice():
158
  try:
159
  if 'audio' not in request.files:
 
160
  return jsonify({'error': 'No audio file provided'}), 400
161
 
162
  audio_file = request.files['audio']
163
  conversation_id = request.form.get('conversation_id', str(uuid.uuid4()))
164
 
165
- # Create a temporary directory to handle audio processing
 
 
 
166
  with tempfile.TemporaryDirectory() as temp_dir:
167
- # Save the incoming WebM file
168
  input_path = os.path.join(temp_dir, 'input.webm')
169
  audio_file.save(input_path)
 
170
 
171
  try:
172
- # Convert WebM to WAV using pydub
173
- audio = AudioSegment.from_file(input_path, format="webm")
174
  output_path = os.path.join(temp_dir, 'output.wav')
175
- audio.export(output_path, format="wav")
 
 
176
 
177
- # Use the converted file for speech recognition
 
 
 
 
 
 
 
178
  with sr.AudioFile(output_path) as source:
179
- audio_data = recognizer.record(source)
180
- text = recognizer.recognize_google(audio_data)
 
181
 
182
  if not text:
183
  return jsonify({'error': 'Could not transcribe audio'}), 400
184
 
185
- # Get response from Groq
186
  response = chat_with_groq(text, conversation_id)
187
 
188
  # Generate voice response
@@ -199,12 +223,18 @@ def handle_voice():
199
 
200
  return jsonify(result)
201
 
 
 
 
 
 
 
202
  except Exception as e:
203
- print(f"Error processing audio: {str(e)}")
204
  return jsonify({'error': f'Error processing audio: {str(e)}'}), 400
205
 
206
  except Exception as e:
207
- print(f"Error in handle_voice: {str(e)}")
208
- return jsonify({'error': str(e)}), 400
209
  if __name__ == '__main__':
210
  app.run(host='0.0.0.0', port=7860)
 
1
+ from flask import Flask, request, jsonify
 
 
 
 
 
 
 
2
  import speech_recognition as sr
3
+ import io
4
+ import os
5
  import tempfile
 
6
  from pydub import AudioSegment
7
+ import logging
8
+
9
+ # Set up logging
10
+ logging.basicConfig(level=logging.DEBUG)
11
+ logger = logging.getLogger(__name__)
12
+
13
 
14
 
15
  try:
 
31
  # Initialize speech recognition
32
  recognizer = sr.Recognizer()
33
 
34
+ def init_speech_recognition():
35
+ """Initialize speech recognition with fallback options"""
36
+ try:
37
+ recognizer = sr.Recognizer()
38
+ return recognizer
39
+ except Exception as e:
40
+ logger.error(f"Failed to initialize speech recognition: {e}")
41
+ return None
42
+
43
  # Store conversation history
44
  conversations = {}
45
 
 
166
  def handle_voice():
167
  try:
168
  if 'audio' not in request.files:
169
+ logger.error("No audio file in request")
170
  return jsonify({'error': 'No audio file provided'}), 400
171
 
172
  audio_file = request.files['audio']
173
  conversation_id = request.form.get('conversation_id', str(uuid.uuid4()))
174
 
175
+ # Log incoming request details
176
+ logger.debug(f"Received audio file: {audio_file.filename}, "
177
+ f"Content type: {audio_file.content_type}")
178
+
179
  with tempfile.TemporaryDirectory() as temp_dir:
180
+ # Save incoming audio
181
  input_path = os.path.join(temp_dir, 'input.webm')
182
  audio_file.save(input_path)
183
+ logger.debug(f"Saved audio file to: {input_path}")
184
 
185
  try:
186
+ # Convert audio using pydub
187
+ audio = AudioSegment.from_file(input_path)
188
  output_path = os.path.join(temp_dir, 'output.wav')
189
+ audio.export(output_path, format="wav",
190
+ parameters=["-ac", "1", "-ar", "16000"])
191
+ logger.debug("Audio conversion successful")
192
 
193
+ # Initialize recognition if not already done
194
+ if not hasattr(app, 'recognizer'):
195
+ app.recognizer = init_speech_recognition()
196
+
197
+ if not app.recognizer:
198
+ return jsonify({'error': 'Speech recognition unavailable'}), 503
199
+
200
+ # Perform speech recognition
201
  with sr.AudioFile(output_path) as source:
202
+ audio_data = app.recognizer.record(source)
203
+ text = app.recognizer.recognize_google(audio_data)
204
+ logger.debug(f"Speech recognition result: {text}")
205
 
206
  if not text:
207
  return jsonify({'error': 'Could not transcribe audio'}), 400
208
 
209
+ # Get chatbot response
210
  response = chat_with_groq(text, conversation_id)
211
 
212
  # Generate voice response
 
223
 
224
  return jsonify(result)
225
 
226
+ except sr.UnknownValueError:
227
+ logger.error("Speech recognition could not understand audio")
228
+ return jsonify({'error': 'Could not understand audio'}), 400
229
+ except sr.RequestError as e:
230
+ logger.error(f"Speech recognition service error: {e}")
231
+ return jsonify({'error': 'Speech recognition service error'}), 503
232
  except Exception as e:
233
+ logger.error(f"Audio processing error: {e}")
234
  return jsonify({'error': f'Error processing audio: {str(e)}'}), 400
235
 
236
  except Exception as e:
237
+ logger.error(f"General error in handle_voice: {e}")
238
+ return jsonify({'error': str(e)}), 500
239
  if __name__ == '__main__':
240
  app.run(host='0.0.0.0', port=7860)