Spaces:

anshharora
/

Luna_AI

Sleeping

App Files Files Community

anshharora commited on Jan 9

Commit

7d18b6a

verified ·

1 Parent(s): c3df5d0

Update app.py

Browse files

Files changed (1) hide show

app.py +71 -68

app.py CHANGED Viewed

@@ -1,15 +1,50 @@
-from flask import Flask, request, jsonify
-import speech_recognition as sr
 import io
-import os
 import tempfile
-from pydub import AudioSegment
-import logging
 # Set up logging
 logging.basicConfig(level=logging.DEBUG)
 logger = logging.getLogger(__name__)
 try:
@@ -166,75 +201,43 @@ def chat():
 def handle_voice():
     try:
         if 'audio' not in request.files:
-            logger.error("No audio file in request")
             return jsonify({'error': 'No audio file provided'}), 400
         audio_file = request.files['audio']
         conversation_id = request.form.get('conversation_id', str(uuid.uuid4()))
-        # Log incoming request details
-        logger.debug(f"Received audio file: {audio_file.filename}, "
-                    f"Content type: {audio_file.content_type}")
-        with tempfile.TemporaryDirectory() as temp_dir:
-            # Save incoming audio
-            input_path = os.path.join(temp_dir, 'input.webm')
-            audio_file.save(input_path)
-            logger.debug(f"Saved audio file to: {input_path}")
-            try:
-                # Convert audio using pydub
-                audio = AudioSegment.from_file(input_path)
-                output_path = os.path.join(temp_dir, 'output.wav')
-                audio.export(output_path, format="wav",
-                           parameters=["-ac", "1", "-ar", "16000"])
-                logger.debug("Audio conversion successful")
-                # Initialize recognition if not already done
-                if not hasattr(app, 'recognizer'):
-                    app.recognizer = init_speech_recognition()
-                if not app.recognizer:
-                    return jsonify({'error': 'Speech recognition unavailable'}), 503
-                # Perform speech recognition
-                with sr.AudioFile(output_path) as source:
-                    audio_data = app.recognizer.record(source)
-                    text = app.recognizer.recognize_google(audio_data)
-                    logger.debug(f"Speech recognition result: {text}")
-                if not text:
-                    return jsonify({'error': 'Could not transcribe audio'}), 400
-                # Get chatbot response
-                response = chat_with_groq(text, conversation_id)
-                # Generate voice response
-                audio_io = text_to_speech(response)
-                result = {
-                    'text': text,
-                    'response': response,
-                    'conversation_id': conversation_id
-                }
-                if audio_io:
-                    audio_base64 = base64.b64encode(audio_io.getvalue()).decode('utf-8')
-                    result['voice_response'] = audio_base64
-                return jsonify(result)
-            except sr.UnknownValueError:
-                logger.error("Speech recognition could not understand audio")
-                return jsonify({'error': 'Could not understand audio'}), 400
-            except sr.RequestError as e:
-                logger.error(f"Speech recognition service error: {e}")
-                return jsonify({'error': 'Speech recognition service error'}), 503
-            except Exception as e:
-                logger.error(f"Audio processing error: {e}")
-                return jsonify({'error': f'Error processing audio: {str(e)}'}), 400
     except Exception as e:
-        logger.error(f"General error in handle_voice: {e}")
-        return jsonify({'error': str(e)}), 500
 if __name__ == '__main__':
     app.run(host='0.0.0.0', port=7860)

+import sounddevice as sd
+import scipy.io.wavfile as wav
+import numpy as np
+from pydub import AudioSegment
 import io
 import tempfile
+import os
 # Set up logging
 logging.basicConfig(level=logging.DEBUG)
 logger = logging.getLogger(__name__)
+class AudioProcessor:
+    def __init__(self):
+        self.sample_rate = 16000
+        self.channels = 1
+    def process_audio(self, audio_file):
+        """Process incoming audio file and convert to proper format"""
+        with tempfile.TemporaryDirectory() as temp_dir:
+            # Save incoming audio
+            input_path = os.path.join(temp_dir, 'input.webm')
+            audio_file.save(input_path)
+            # Convert to WAV using pydub
+            audio = AudioSegment.from_file(input_path)
+            audio = audio.set_channels(self.channels)
+            audio = audio.set_frame_rate(self.sample_rate)
+            output_path = os.path.join(temp_dir, 'output.wav')
+            audio.export(output_path, format='wav')
+            # Read the processed audio file
+            return output_path
+    def record_audio(self, duration=5):
+        """Record audio using sounddevice"""
+        recording = sd.rec(
+            int(duration * self.sample_rate),
+            samplerate=self.sample_rate,
+            channels=self.channels
+        )
+        sd.wait()
+        return recording
 try:
 def handle_voice():
     try:
         if 'audio' not in request.files:
             return jsonify({'error': 'No audio file provided'}), 400
         audio_file = request.files['audio']
         conversation_id = request.form.get('conversation_id', str(uuid.uuid4()))
+        # Process audio
+        audio_processor = AudioProcessor()
+        wav_path = audio_processor.process_audio(audio_file)
+        # Perform speech recognition
+        recognizer = sr.Recognizer()
+        with sr.AudioFile(wav_path) as source:
+            audio_data = recognizer.record(source)
+            text = recognizer.recognize_google(audio_data)
+        if not text:
+            return jsonify({'error': 'Could not transcribe audio'}), 400
+        # Get chatbot response
+        response = chat_with_groq(text, conversation_id)
+        # Generate voice response
+        audio_io = text_to_speech(response)
+        result = {
+            'text': text,
+            'response': response,
+            'conversation_id': conversation_id
+        }
+        if audio_io:
+            audio_base64 = base64.b64encode(audio_io.getvalue()).decode('utf-8')
+            result['voice_response'] = audio_base64
+        return jsonify(result)
     except Exception as e:
+        print(f"Error in handle_voice: {str(e)}")
+        return jsonify({'error': str(e)}), 400
 if __name__ == '__main__':
     app.run(host='0.0.0.0', port=7860)