anshharora commited on
Commit
a9577f3
·
verified ·
1 Parent(s): 321910f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +19 -22
app.py CHANGED
@@ -9,6 +9,8 @@ import base64
9
  import speech_recognition as sr
10
  import tempfile
11
  import json
 
 
12
 
13
  try:
14
  import pyaudio
@@ -160,19 +162,22 @@ def handle_voice():
160
  audio_file = request.files['audio']
161
  conversation_id = request.form.get('conversation_id', str(uuid.uuid4()))
162
 
163
- # Save the audio file temporarily with a .wav extension
164
- with tempfile.NamedTemporaryFile(delete=False, suffix='.wav') as temp_audio:
165
- audio_file.save(temp_audio.name)
166
-
167
- # Use FFmpeg to convert the audio to the correct format
168
- output_path = temp_audio.name + '_converted.wav'
169
- os.system(f'ffmpeg -i {temp_audio.name} -acodec pcm_s16le -ac 1 -ar 16000 {output_path}')
170
 
171
  try:
 
 
 
 
 
172
  # Use the converted file for speech recognition
173
  with sr.AudioFile(output_path) as source:
174
- audio = recognizer.record(source)
175
- text = recognizer.recognize_google(audio)
176
 
177
  if not text:
178
  return jsonify({'error': 'Could not transcribe audio'}), 400
@@ -194,20 +199,12 @@ def handle_voice():
194
 
195
  return jsonify(result)
196
 
197
- finally:
198
- # Clean up temporary files
199
- try:
200
- os.remove(temp_audio.name)
201
- os.remove(output_path)
202
- except:
203
- pass
204
 
205
- except sr.UnknownValueError:
206
- return jsonify({'error': 'Could not understand audio'}), 400
207
- except sr.RequestError as e:
208
- return jsonify({'error': f'Could not request results: {str(e)}'}), 400
209
  except Exception as e:
210
- print(f"Error in speech_to_text: {str(e)}")
211
- return jsonify({'error': str(e)}), 400
212
  if __name__ == '__main__':
213
  app.run(host='0.0.0.0', port=7860)
 
9
  import speech_recognition as sr
10
  import tempfile
11
  import json
12
+ from pydub import AudioSegment
13
+
14
 
15
  try:
16
  import pyaudio
 
162
  audio_file = request.files['audio']
163
  conversation_id = request.form.get('conversation_id', str(uuid.uuid4()))
164
 
165
+ # Create a temporary directory to handle audio processing
166
+ with tempfile.TemporaryDirectory() as temp_dir:
167
+ # Save the incoming WebM file
168
+ input_path = os.path.join(temp_dir, 'input.webm')
169
+ audio_file.save(input_path)
 
 
170
 
171
  try:
172
+ # Convert WebM to WAV using pydub
173
+ audio = AudioSegment.from_file(input_path, format="webm")
174
+ output_path = os.path.join(temp_dir, 'output.wav')
175
+ audio.export(output_path, format="wav")
176
+
177
  # Use the converted file for speech recognition
178
  with sr.AudioFile(output_path) as source:
179
+ audio_data = recognizer.record(source)
180
+ text = recognizer.recognize_google(audio_data)
181
 
182
  if not text:
183
  return jsonify({'error': 'Could not transcribe audio'}), 400
 
199
 
200
  return jsonify(result)
201
 
202
+ except Exception as e:
203
+ print(f"Error processing audio: {str(e)}")
204
+ return jsonify({'error': f'Error processing audio: {str(e)}'}), 400
 
 
 
 
205
 
 
 
 
 
206
  except Exception as e:
207
+ print(f"Error in handle_voice: {str(e)}")
208
+ return jsonify({'error': str(e)}), 400
209
  if __name__ == '__main__':
210
  app.run(host='0.0.0.0', port=7860)