from flask import Flask, request, jsonify import speech_recognition as sr import io import os import tempfile from pydub import AudioSegment import logging # Set up logging logging.basicConfig(level=logging.DEBUG) logger = logging.getLogger(__name__) try: import pyaudio except ImportError: print("Warning: PyAudio not available, speech functionality will be limited") # Initialize Flask app app = Flask(__name__, static_folder='static') # Load environment variables load_dotenv() # Groq API Configuration GROQ_API_KEY = os.getenv("GROQ_API_KEY") client = Groq(api_key=GROQ_API_KEY) MODEL = "llama3-70b-8192" # Initialize speech recognition recognizer = sr.Recognizer() def init_speech_recognition(): """Initialize speech recognition with fallback options""" try: recognizer = sr.Recognizer() return recognizer except Exception as e: logger.error(f"Failed to initialize speech recognition: {e}") return None # Store conversation history conversations = {} def load_base_prompt(): try: with open("base_prompt.txt", "r") as file: return file.read().strip() except FileNotFoundError: print("Error: base_prompt.txt file not found.") return "You are a helpful assistant for language learning." # Load the base prompt base_prompt = load_base_prompt() def chat_with_groq(user_message, conversation_id=None): try: # Get conversation history or create new messages = conversations.get(conversation_id, []) if not messages: messages.append({"role": "system", "content": base_prompt}) # Add user message messages.append({"role": "user", "content": user_message}) # Get completion from Groq completion = client.chat.completions.create( model=MODEL, messages=messages, temperature=0.1, max_tokens=1024 ) # Add assistant's response to history assistant_message = completion.choices[0].message.content.strip() messages.append({"role": "assistant", "content": assistant_message}) # Update conversation history if conversation_id: conversations[conversation_id] = messages return assistant_message except Exception as e: print(f"Error in chat_with_groq: {str(e)}") return f"I apologize, but I'm having trouble responding right now. Error: {str(e)}" def text_to_speech(text): try: tts = gTTS(text=text, lang='en') audio_io = io.BytesIO() tts.write_to_fp(audio_io) audio_io.seek(0) return audio_io except Exception as e: print(f"Error in text_to_speech: {str(e)}") return None def speech_to_text(audio_file): try: # Save the uploaded audio to a temporary file with tempfile.NamedTemporaryFile(delete=False, suffix='.wav') as temp_audio: audio_file.save(temp_audio.name) # Use SpeechRecognition to convert speech to text with sr.AudioFile(temp_audio.name) as source: # Adjust recognition settings recognizer.dynamic_energy_threshold = True recognizer.energy_threshold = 4000 # Record the entire audio file audio = recognizer.record(source) # Perform recognition with increased timeout text = recognizer.recognize_google(audio, language='en-US') return text except sr.UnknownValueError: return "Could not understand audio" except sr.RequestError as e: return f"Could not request results; {str(e)}" except Exception as e: print(f"Error in speech_to_text: {str(e)}") return None finally: # Clean up temporary file try: os.unlink(temp_audio.name) except: pass @app.route('/') def index(): return render_template('index.html') @app.route('/api/chat', methods=['POST']) def chat(): try: data = request.get_json() user_message = data.get('message', '') conversation_id = data.get('conversation_id', str(uuid.uuid4())) if not user_message: return jsonify({'error': 'No message provided'}), 400 # Get response from Groq response = chat_with_groq(user_message, conversation_id) # Generate voice response audio_io = text_to_speech(response) result = { 'response': response, 'conversation_id': conversation_id } if audio_io: audio_base64 = base64.b64encode(audio_io.getvalue()).decode('utf-8') result['voice_response'] = audio_base64 return jsonify(result) except Exception as e: return jsonify({'error': str(e)}), 500 @app.route('/api/voice', methods=['POST']) def handle_voice(): try: if 'audio' not in request.files: logger.error("No audio file in request") return jsonify({'error': 'No audio file provided'}), 400 audio_file = request.files['audio'] conversation_id = request.form.get('conversation_id', str(uuid.uuid4())) # Log incoming request details logger.debug(f"Received audio file: {audio_file.filename}, " f"Content type: {audio_file.content_type}") with tempfile.TemporaryDirectory() as temp_dir: # Save incoming audio input_path = os.path.join(temp_dir, 'input.webm') audio_file.save(input_path) logger.debug(f"Saved audio file to: {input_path}") try: # Convert audio using pydub audio = AudioSegment.from_file(input_path) output_path = os.path.join(temp_dir, 'output.wav') audio.export(output_path, format="wav", parameters=["-ac", "1", "-ar", "16000"]) logger.debug("Audio conversion successful") # Initialize recognition if not already done if not hasattr(app, 'recognizer'): app.recognizer = init_speech_recognition() if not app.recognizer: return jsonify({'error': 'Speech recognition unavailable'}), 503 # Perform speech recognition with sr.AudioFile(output_path) as source: audio_data = app.recognizer.record(source) text = app.recognizer.recognize_google(audio_data) logger.debug(f"Speech recognition result: {text}") if not text: return jsonify({'error': 'Could not transcribe audio'}), 400 # Get chatbot response response = chat_with_groq(text, conversation_id) # Generate voice response audio_io = text_to_speech(response) result = { 'text': text, 'response': response, 'conversation_id': conversation_id } if audio_io: audio_base64 = base64.b64encode(audio_io.getvalue()).decode('utf-8') result['voice_response'] = audio_base64 return jsonify(result) except sr.UnknownValueError: logger.error("Speech recognition could not understand audio") return jsonify({'error': 'Could not understand audio'}), 400 except sr.RequestError as e: logger.error(f"Speech recognition service error: {e}") return jsonify({'error': 'Speech recognition service error'}), 503 except Exception as e: logger.error(f"Audio processing error: {e}") return jsonify({'error': f'Error processing audio: {str(e)}'}), 400 except Exception as e: logger.error(f"General error in handle_voice: {e}") return jsonify({'error': str(e)}), 500 if __name__ == '__main__': app.run(host='0.0.0.0', port=7860)