Spaces:

anshharora
/

Luna_AI

Sleeping

App Files Files Community

anshharora commited on Jan 9

Commit

a9577f3

verified ·

1 Parent(s): 321910f

Update app.py

Browse files

Files changed (1) hide show

app.py +19 -22

app.py CHANGED Viewed

@@ -9,6 +9,8 @@ import base64
 import speech_recognition as sr
 import tempfile
 import json
 try:
     import pyaudio
@@ -160,19 +162,22 @@ def handle_voice():
         audio_file = request.files['audio']
         conversation_id = request.form.get('conversation_id', str(uuid.uuid4()))
-        # Save the audio file temporarily with a .wav extension
-        with tempfile.NamedTemporaryFile(delete=False, suffix='.wav') as temp_audio:
-            audio_file.save(temp_audio.name)
-            # Use FFmpeg to convert the audio to the correct format
-            output_path = temp_audio.name + '_converted.wav'
-            os.system(f'ffmpeg -i {temp_audio.name} -acodec pcm_s16le -ac 1 -ar 16000 {output_path}')
             try:
                 # Use the converted file for speech recognition
                 with sr.AudioFile(output_path) as source:
-                    audio = recognizer.record(source)
-                    text = recognizer.recognize_google(audio)
                 if not text:
                     return jsonify({'error': 'Could not transcribe audio'}), 400
@@ -194,20 +199,12 @@ def handle_voice():
                 return jsonify(result)
-            finally:
-                # Clean up temporary files
-                try:
-                    os.remove(temp_audio.name)
-                    os.remove(output_path)
-                except:
-                    pass
-    except sr.UnknownValueError:
-        return jsonify({'error': 'Could not understand audio'}), 400
-    except sr.RequestError as e:
-        return jsonify({'error': f'Could not request results: {str(e)}'}), 400
     except Exception as e:
-        print(f"Error in speech_to_text: {str(e)}")
-        return jsonify({'error': str(e)}), 400
 if __name__ == '__main__':
     app.run(host='0.0.0.0', port=7860)

 import speech_recognition as sr
 import tempfile
 import json
+from pydub import AudioSegment
 try:
     import pyaudio
         audio_file = request.files['audio']
         conversation_id = request.form.get('conversation_id', str(uuid.uuid4()))
+        # Create a temporary directory to handle audio processing
+        with tempfile.TemporaryDirectory() as temp_dir:
+            # Save the incoming WebM file
+            input_path = os.path.join(temp_dir, 'input.webm')
+            audio_file.save(input_path)
             try:
+                # Convert WebM to WAV using pydub
+                audio = AudioSegment.from_file(input_path, format="webm")
+                output_path = os.path.join(temp_dir, 'output.wav')
+                audio.export(output_path, format="wav")
                 # Use the converted file for speech recognition
                 with sr.AudioFile(output_path) as source:
+                    audio_data = recognizer.record(source)
+                    text = recognizer.recognize_google(audio_data)
                 if not text:
                     return jsonify({'error': 'Could not transcribe audio'}), 400
                 return jsonify(result)
+            except Exception as e:
+                print(f"Error processing audio: {str(e)}")
+                return jsonify({'error': f'Error processing audio: {str(e)}'}), 400
     except Exception as e:
+        print(f"Error in handle_voice: {str(e)}")
+        return jsonify({'error': str(e)}), 400
 if __name__ == '__main__':
     app.run(host='0.0.0.0', port=7860)