Spaces:

anshharora
/

Luna_AI

Sleeping

App Files Files Community

anshharora commited on Jan 9

Commit

d4c64bc

verified ·

1 Parent(s): b0dba38

Update app.py

Browse files

Files changed (1) hide show

app.py +51 -21

app.py CHANGED Viewed

@@ -1,15 +1,15 @@
-from flask import Flask, request, jsonify, render_template
-from dotenv import load_dotenv
-from groq import Groq
-import os
-import uuid
-from gtts import gTTS
-import io
-import base64
 import speech_recognition as sr
 import tempfile
-import json
 from pydub import AudioSegment
 try:
@@ -31,6 +31,15 @@ MODEL = "llama3-70b-8192"
 # Initialize speech recognition
 recognizer = sr.Recognizer()
 # Store conversation history
 conversations = {}
@@ -157,32 +166,47 @@ def chat():
 def handle_voice():
     try:
         if 'audio' not in request.files:
             return jsonify({'error': 'No audio file provided'}), 400
         audio_file = request.files['audio']
         conversation_id = request.form.get('conversation_id', str(uuid.uuid4()))
-        # Create a temporary directory to handle audio processing
         with tempfile.TemporaryDirectory() as temp_dir:
-            # Save the incoming WebM file
             input_path = os.path.join(temp_dir, 'input.webm')
             audio_file.save(input_path)
             try:
-                # Convert WebM to WAV using pydub
-                audio = AudioSegment.from_file(input_path, format="webm")
                 output_path = os.path.join(temp_dir, 'output.wav')
-                audio.export(output_path, format="wav")
-                # Use the converted file for speech recognition
                 with sr.AudioFile(output_path) as source:
-                    audio_data = recognizer.record(source)
-                    text = recognizer.recognize_google(audio_data)
                 if not text:
                     return jsonify({'error': 'Could not transcribe audio'}), 400
-                # Get response from Groq
                 response = chat_with_groq(text, conversation_id)
                 # Generate voice response
@@ -199,12 +223,18 @@ def handle_voice():
                 return jsonify(result)
             except Exception as e:
-                print(f"Error processing audio: {str(e)}")
                 return jsonify({'error': f'Error processing audio: {str(e)}'}), 400
     except Exception as e:
-        print(f"Error in handle_voice: {str(e)}")
-        return jsonify({'error': str(e)}), 400
 if __name__ == '__main__':
     app.run(host='0.0.0.0', port=7860)

+from flask import Flask, request, jsonify
 import speech_recognition as sr
+import io
+import os
 import tempfile
 from pydub import AudioSegment
+import logging
+# Set up logging
+logging.basicConfig(level=logging.DEBUG)
+logger = logging.getLogger(__name__)
 try:
 # Initialize speech recognition
 recognizer = sr.Recognizer()
+def init_speech_recognition():
+    """Initialize speech recognition with fallback options"""
+    try:
+        recognizer = sr.Recognizer()
+        return recognizer
+    except Exception as e:
+        logger.error(f"Failed to initialize speech recognition: {e}")
+        return None
 # Store conversation history
 conversations = {}
 def handle_voice():
     try:
         if 'audio' not in request.files:
+            logger.error("No audio file in request")
             return jsonify({'error': 'No audio file provided'}), 400
         audio_file = request.files['audio']
         conversation_id = request.form.get('conversation_id', str(uuid.uuid4()))
+        # Log incoming request details
+        logger.debug(f"Received audio file: {audio_file.filename}, "
+                    f"Content type: {audio_file.content_type}")
         with tempfile.TemporaryDirectory() as temp_dir:
+            # Save incoming audio
             input_path = os.path.join(temp_dir, 'input.webm')
             audio_file.save(input_path)
+            logger.debug(f"Saved audio file to: {input_path}")
             try:
+                # Convert audio using pydub
+                audio = AudioSegment.from_file(input_path)
                 output_path = os.path.join(temp_dir, 'output.wav')
+                audio.export(output_path, format="wav",
+                           parameters=["-ac", "1", "-ar", "16000"])
+                logger.debug("Audio conversion successful")
+                # Initialize recognition if not already done
+                if not hasattr(app, 'recognizer'):
+                    app.recognizer = init_speech_recognition()
+                if not app.recognizer:
+                    return jsonify({'error': 'Speech recognition unavailable'}), 503
+                # Perform speech recognition
                 with sr.AudioFile(output_path) as source:
+                    audio_data = app.recognizer.record(source)
+                    text = app.recognizer.recognize_google(audio_data)
+                    logger.debug(f"Speech recognition result: {text}")
                 if not text:
                     return jsonify({'error': 'Could not transcribe audio'}), 400
+                # Get chatbot response
                 response = chat_with_groq(text, conversation_id)
                 # Generate voice response
                 return jsonify(result)
+            except sr.UnknownValueError:
+                logger.error("Speech recognition could not understand audio")
+                return jsonify({'error': 'Could not understand audio'}), 400
+            except sr.RequestError as e:
+                logger.error(f"Speech recognition service error: {e}")
+                return jsonify({'error': 'Speech recognition service error'}), 503
             except Exception as e:
+                logger.error(f"Audio processing error: {e}")
                 return jsonify({'error': f'Error processing audio: {str(e)}'}), 400
     except Exception as e:
+        logger.error(f"General error in handle_voice: {e}")
+        return jsonify({'error': str(e)}), 500
 if __name__ == '__main__':
     app.run(host='0.0.0.0', port=7860)