Spaces:

anshharora
/

Luna_AI

Running

App Files Files Community

anshharora commited on Jan 9

Commit

b19a42f

verified ·

1 Parent(s): a246fe4

Update app.py

Browse files

Files changed (1) hide show

app.py +77 -76

app.py CHANGED Viewed

@@ -1,45 +1,19 @@
 from flask import Flask, request, jsonify, render_template
 import os
 import uuid
-import base64
-import logging
-from dotenv import load_dotenv
-import io
-import tempfile
 from gtts import gTTS
-from groq import Groq
 import speech_recognition as sr
-from pydub import AudioSegment
-# Set up logging
-logging.basicConfig(level=logging.DEBUG)
-logger = logging.getLogger(__name__)
-class AudioProcessor:
-    def __init__(self):
-        self.sample_rate = 16000
-        self.channels = 1
-    def process_audio(self, audio_file):
-        """Process incoming audio file and convert to proper format"""
-        try:
-            with tempfile.TemporaryDirectory() as temp_dir:
-                # Save incoming audio
-                input_path = os.path.join(temp_dir, 'input.webm')
-                audio_file.save(input_path)
-                # Convert to WAV using pydub
-                audio = AudioSegment.from_file(input_path)
-                audio = audio.set_channels(self.channels)
-                audio = audio.set_frame_rate(self.sample_rate)
-                output_path = os.path.join(temp_dir, 'output.wav')
-                audio.export(output_path, format='wav')
-                return output_path
-        except Exception as e:
-            logger.error(f"Error processing audio: {e}")
-            raise
 # Initialize Flask app
 app = Flask(__name__, static_folder='static')
@@ -63,7 +37,7 @@ def load_base_prompt():
         with open("base_prompt.txt", "r") as file:
             return file.read().strip()
     except FileNotFoundError:
-        logger.warning("base_prompt.txt not found, using default prompt")
         return "You are a helpful assistant for language learning."
 # Load the base prompt
@@ -97,7 +71,7 @@ def chat_with_groq(user_message, conversation_id=None):
         return assistant_message
     except Exception as e:
-        logger.error(f"Error in chat_with_groq: {e}")
         return f"I apologize, but I'm having trouble responding right now. Error: {str(e)}"
 def text_to_speech(text):
@@ -108,12 +82,17 @@ def text_to_speech(text):
         audio_io.seek(0)
         return audio_io
     except Exception as e:
-        logger.error(f"Error in text_to_speech: {e}")
         return None
-def speech_to_text(audio_path):
     try:
-        with sr.AudioFile(audio_path) as source:
             # Adjust recognition settings
             recognizer.dynamic_energy_threshold = True
             recognizer.energy_threshold = 4000
@@ -121,18 +100,23 @@ def speech_to_text(audio_path):
             # Record the entire audio file
             audio = recognizer.record(source)
-            # Perform recognition
             text = recognizer.recognize_google(audio, language='en-US')
             return text
     except sr.UnknownValueError:
         return "Could not understand audio"
     except sr.RequestError as e:
-        logger.error(f"Speech recognition request error: {e}")
         return f"Could not request results; {str(e)}"
     except Exception as e:
-        logger.error(f"Error in speech_to_text: {e}")
         return None
 @app.route('/')
 def index():
@@ -165,7 +149,6 @@ def chat():
         return jsonify(result)
     except Exception as e:
-        logger.error(f"Error in chat endpoint: {e}")
         return jsonify({'error': str(e)}), 500
 @app.route('/api/voice', methods=['POST'])
@@ -177,36 +160,54 @@ def handle_voice():
         audio_file = request.files['audio']
         conversation_id = request.form.get('conversation_id', str(uuid.uuid4()))
-        # Process audio
-        audio_processor = AudioProcessor()
-        wav_path = audio_processor.process_audio(audio_file)
-        # Perform speech recognition
-        text = speech_to_text(wav_path)
-        if not text:
-            return jsonify({'error': 'Could not transcribe audio'}), 400
-        # Get chatbot response
-        response = chat_with_groq(text, conversation_id)
-        # Generate voice response
-        audio_io = text_to_speech(response)
-        result = {
-            'text': text,
-            'response': response,
-            'conversation_id': conversation_id
-        }
-        if audio_io:
-            audio_base64 = base64.b64encode(audio_io.getvalue()).decode('utf-8')
-            result['voice_response'] = audio_base64
-        return jsonify(result)
     except Exception as e:
-        logger.error(f"Error in handle_voice: {e}")
-        return jsonify({'error': str(e)}), 400
 if __name__ == '__main__':
-    app.run(host='0.0.0.0', port=7860)

 from flask import Flask, request, jsonify, render_template
+from dotenv import load_dotenv
+from groq import Groq
 import os
 import uuid
 from gtts import gTTS
+import io
+import base64
 import speech_recognition as sr
+import tempfile
+import json
+try:
+    import pyaudio
+except ImportError:
+    print("Warning: PyAudio not available, speech functionality will be limited")
 # Initialize Flask app
 app = Flask(__name__, static_folder='static')
         with open("base_prompt.txt", "r") as file:
             return file.read().strip()
     except FileNotFoundError:
+        print("Error: base_prompt.txt file not found.")
         return "You are a helpful assistant for language learning."
 # Load the base prompt
         return assistant_message
     except Exception as e:
+        print(f"Error in chat_with_groq: {str(e)}")
         return f"I apologize, but I'm having trouble responding right now. Error: {str(e)}"
 def text_to_speech(text):
         audio_io.seek(0)
         return audio_io
     except Exception as e:
+        print(f"Error in text_to_speech: {str(e)}")
         return None
+def speech_to_text(audio_file):
     try:
+        # Save the uploaded audio to a temporary file
+        with tempfile.NamedTemporaryFile(delete=False, suffix='.wav') as temp_audio:
+            audio_file.save(temp_audio.name)
+        # Use SpeechRecognition to convert speech to text
+        with sr.AudioFile(temp_audio.name) as source:
             # Adjust recognition settings
             recognizer.dynamic_energy_threshold = True
             recognizer.energy_threshold = 4000
             # Record the entire audio file
             audio = recognizer.record(source)
+            # Perform recognition with increased timeout
             text = recognizer.recognize_google(audio, language='en-US')
             return text
     except sr.UnknownValueError:
         return "Could not understand audio"
     except sr.RequestError as e:
         return f"Could not request results; {str(e)}"
     except Exception as e:
+        print(f"Error in speech_to_text: {str(e)}")
         return None
+    finally:
+        # Clean up temporary file
+        try:
+            os.unlink(temp_audio.name)
+        except:
+            pass
 @app.route('/')
 def index():
         return jsonify(result)
     except Exception as e:
         return jsonify({'error': str(e)}), 500
 @app.route('/api/voice', methods=['POST'])
         audio_file = request.files['audio']
         conversation_id = request.form.get('conversation_id', str(uuid.uuid4()))
+        # Save the audio file temporarily with a .wav extension
+        with tempfile.NamedTemporaryFile(delete=False, suffix='.wav') as temp_audio:
+            audio_file.save(temp_audio.name)
+            # Use FFmpeg to convert the audio to the correct format
+            output_path = temp_audio.name + '_converted.wav'
+            os.system(f'ffmpeg -i {temp_audio.name} -acodec pcm_s16le -ac 1 -ar 16000 {output_path}')
+            try:
+                # Use the converted file for speech recognition
+                with sr.AudioFile(output_path) as source:
+                    audio = recognizer.record(source)
+                    text = recognizer.recognize_google(audio)
+                if not text:
+                    return jsonify({'error': 'Could not transcribe audio'}), 400
+                # Get response from Groq
+                response = chat_with_groq(text, conversation_id)
+                # Generate voice response
+                audio_io = text_to_speech(response)
+                result = {
+                    'text': text,
+                    'response': response,
+                    'conversation_id': conversation_id
+                }
+                if audio_io:
+                    audio_base64 = base64.b64encode(audio_io.getvalue()).decode('utf-8')
+                    result['voice_response'] = audio_base64
+                return jsonify(result)
+            finally:
+                # Clean up temporary files
+                try:
+                    os.remove(temp_audio.name)
+                    os.remove(output_path)
+                except:
+                    pass
+    except sr.UnknownValueError:
+        return jsonify({'error': 'Could not understand audio'}), 400
+    except sr.RequestError as e:
+        return jsonify({'error': f'Could not request results: {str(e)}'}), 400
     except Exception as e:
+        print(f"Error in speech_to_text: {str(e)}")
+        return jsonify({'error': str(e)}), 400
 if __name__ == '__main__':
+    app.run(host='0.0.0.0', port=7860)