Spaces:

anshharora
/

Luna_AI

Sleeping

File size: 8,396 Bytes

from flask import Flask, request, jsonify
import speech_recognition as sr
import io
import os
import tempfile
from pydub import AudioSegment
import logging

# Set up logging
logging.basicConfig(level=logging.DEBUG)
logger = logging.getLogger(__name__)



try:
    import pyaudio
except ImportError:
    print("Warning: PyAudio not available, speech functionality will be limited")

# Initialize Flask app
app = Flask(__name__, static_folder='static')

# Load environment variables
load_dotenv()

# Groq API Configuration
GROQ_API_KEY = os.getenv("GROQ_API_KEY")
client = Groq(api_key=GROQ_API_KEY)
MODEL = "llama3-70b-8192"

# Initialize speech recognition
recognizer = sr.Recognizer()

def init_speech_recognition():
    """Initialize speech recognition with fallback options"""
    try:
        recognizer = sr.Recognizer()
        return recognizer
    except Exception as e:
        logger.error(f"Failed to initialize speech recognition: {e}")
        return None
        
# Store conversation history
conversations = {}

def load_base_prompt():
    try:
        with open("base_prompt.txt", "r") as file:
            return file.read().strip()
    except FileNotFoundError:
        print("Error: base_prompt.txt file not found.")
        return "You are a helpful assistant for language learning."

# Load the base prompt
base_prompt = load_base_prompt()

def chat_with_groq(user_message, conversation_id=None):
    try:
        # Get conversation history or create new
        messages = conversations.get(conversation_id, [])
        if not messages:
            messages.append({"role": "system", "content": base_prompt})
        
        # Add user message
        messages.append({"role": "user", "content": user_message})
        
        # Get completion from Groq
        completion = client.chat.completions.create(
            model=MODEL,
            messages=messages,
            temperature=0.1,
            max_tokens=1024
        )
        
        # Add assistant's response to history
        assistant_message = completion.choices[0].message.content.strip()
        messages.append({"role": "assistant", "content": assistant_message})
        
        # Update conversation history
        if conversation_id:
            conversations[conversation_id] = messages
        
        return assistant_message
    except Exception as e:
        print(f"Error in chat_with_groq: {str(e)}")
        return f"I apologize, but I'm having trouble responding right now. Error: {str(e)}"

def text_to_speech(text):
    try:
        tts = gTTS(text=text, lang='en')
        audio_io = io.BytesIO()
        tts.write_to_fp(audio_io)
        audio_io.seek(0)
        return audio_io
    except Exception as e:
        print(f"Error in text_to_speech: {str(e)}")
        return None

def speech_to_text(audio_file):
    try:
        # Save the uploaded audio to a temporary file
        with tempfile.NamedTemporaryFile(delete=False, suffix='.wav') as temp_audio:
            audio_file.save(temp_audio.name)
            
        # Use SpeechRecognition to convert speech to text
        with sr.AudioFile(temp_audio.name) as source:
            # Adjust recognition settings
            recognizer.dynamic_energy_threshold = True
            recognizer.energy_threshold = 4000
            
            # Record the entire audio file
            audio = recognizer.record(source)
            
            # Perform recognition with increased timeout
            text = recognizer.recognize_google(audio, language='en-US')
            return text
            
    except sr.UnknownValueError:
        return "Could not understand audio"
    except sr.RequestError as e:
        return f"Could not request results; {str(e)}"
    except Exception as e:
        print(f"Error in speech_to_text: {str(e)}")
        return None
    finally:
        # Clean up temporary file
        try:
            os.unlink(temp_audio.name)
        except:
            pass

@app.route('/')
def index():
    return render_template('index.html')

@app.route('/api/chat', methods=['POST'])
def chat():
    try:
        data = request.get_json()
        user_message = data.get('message', '')
        conversation_id = data.get('conversation_id', str(uuid.uuid4()))
        
        if not user_message:
            return jsonify({'error': 'No message provided'}), 400
        
        # Get response from Groq
        response = chat_with_groq(user_message, conversation_id)
        
        # Generate voice response
        audio_io = text_to_speech(response)
        result = {
            'response': response,
            'conversation_id': conversation_id
        }
        
        if audio_io:
            audio_base64 = base64.b64encode(audio_io.getvalue()).decode('utf-8')
            result['voice_response'] = audio_base64
        
        return jsonify(result)
    
    except Exception as e:
        return jsonify({'error': str(e)}), 500

@app.route('/api/voice', methods=['POST'])
def handle_voice():
    try:
        if 'audio' not in request.files:
            logger.error("No audio file in request")
            return jsonify({'error': 'No audio file provided'}), 400
        
        audio_file = request.files['audio']
        conversation_id = request.form.get('conversation_id', str(uuid.uuid4()))
        
        # Log incoming request details
        logger.debug(f"Received audio file: {audio_file.filename}, "
                    f"Content type: {audio_file.content_type}")
        
        with tempfile.TemporaryDirectory() as temp_dir:
            # Save incoming audio
            input_path = os.path.join(temp_dir, 'input.webm')
            audio_file.save(input_path)
            logger.debug(f"Saved audio file to: {input_path}")
            
            try:
                # Convert audio using pydub
                audio = AudioSegment.from_file(input_path)
                output_path = os.path.join(temp_dir, 'output.wav')
                audio.export(output_path, format="wav", 
                           parameters=["-ac", "1", "-ar", "16000"])
                logger.debug("Audio conversion successful")
                
                # Initialize recognition if not already done
                if not hasattr(app, 'recognizer'):
                    app.recognizer = init_speech_recognition()
                
                if not app.recognizer:
                    return jsonify({'error': 'Speech recognition unavailable'}), 503
                
                # Perform speech recognition
                with sr.AudioFile(output_path) as source:
                    audio_data = app.recognizer.record(source)
                    text = app.recognizer.recognize_google(audio_data)
                    logger.debug(f"Speech recognition result: {text}")
                
                if not text:
                    return jsonify({'error': 'Could not transcribe audio'}), 400
                
                # Get chatbot response
                response = chat_with_groq(text, conversation_id)
                
                # Generate voice response
                audio_io = text_to_speech(response)
                result = {
                    'text': text,
                    'response': response,
                    'conversation_id': conversation_id
                }
                
                if audio_io:
                    audio_base64 = base64.b64encode(audio_io.getvalue()).decode('utf-8')
                    result['voice_response'] = audio_base64
                
                return jsonify(result)
                
            except sr.UnknownValueError:
                logger.error("Speech recognition could not understand audio")
                return jsonify({'error': 'Could not understand audio'}), 400
            except sr.RequestError as e:
                logger.error(f"Speech recognition service error: {e}")
                return jsonify({'error': 'Speech recognition service error'}), 503
            except Exception as e:
                logger.error(f"Audio processing error: {e}")
                return jsonify({'error': f'Error processing audio: {str(e)}'}), 400
                    
    except Exception as e:
        logger.error(f"General error in handle_voice: {e}")
        return jsonify({'error': str(e)}), 500
if __name__ == '__main__':
    app.run(host='0.0.0.0', port=7860)