Spaces:

anshharora
/

Luna_AI

Sleeping

App Files Files Community

Luna_AI / app.py

anshharora

Update app.py

7d18b6a verified 6 months ago

raw

history blame

7.6 kB

	import sounddevice as sd
	import scipy.io.wavfile as wav
	import numpy as np
	from pydub import AudioSegment
	import io
	import tempfile
	import os


	# Set up logging
	logging.basicConfig(level=logging.DEBUG)
	logger = logging.getLogger(__name__)

	class AudioProcessor:
	def __init__(self):
	self.sample_rate = 16000
	self.channels = 1

	def process_audio(self, audio_file):
	"""Process incoming audio file and convert to proper format"""
	with tempfile.TemporaryDirectory() as temp_dir:
	# Save incoming audio
	input_path = os.path.join(temp_dir, 'input.webm')
	audio_file.save(input_path)

	# Convert to WAV using pydub
	audio = AudioSegment.from_file(input_path)
	audio = audio.set_channels(self.channels)
	audio = audio.set_frame_rate(self.sample_rate)

	output_path = os.path.join(temp_dir, 'output.wav')
	audio.export(output_path, format='wav')

	# Read the processed audio file
	return output_path

	def record_audio(self, duration=5):
	"""Record audio using sounddevice"""
	recording = sd.rec(
	int(duration * self.sample_rate),
	samplerate=self.sample_rate,
	channels=self.channels
	)
	sd.wait()
	return recording




	try:
	import pyaudio
	except ImportError:
	print("Warning: PyAudio not available, speech functionality will be limited")

	# Initialize Flask app
	app = Flask(__name__, static_folder='static')

	# Load environment variables
	load_dotenv()

	# Groq API Configuration
	GROQ_API_KEY = os.getenv("GROQ_API_KEY")
	client = Groq(api_key=GROQ_API_KEY)
	MODEL = "llama3-70b-8192"

	# Initialize speech recognition
	recognizer = sr.Recognizer()

	def init_speech_recognition():
	"""Initialize speech recognition with fallback options"""
	try:
	recognizer = sr.Recognizer()
	return recognizer
	except Exception as e:
	logger.error(f"Failed to initialize speech recognition: {e}")
	return None

	# Store conversation history
	conversations = {}

	def load_base_prompt():
	try:
	with open("base_prompt.txt", "r") as file:
	return file.read().strip()
	except FileNotFoundError:
	print("Error: base_prompt.txt file not found.")
	return "You are a helpful assistant for language learning."

	# Load the base prompt
	base_prompt = load_base_prompt()

	def chat_with_groq(user_message, conversation_id=None):
	try:
	# Get conversation history or create new
	messages = conversations.get(conversation_id, [])
	if not messages:
	messages.append({"role": "system", "content": base_prompt})

	# Add user message
	messages.append({"role": "user", "content": user_message})

	# Get completion from Groq
	completion = client.chat.completions.create(
	model=MODEL,
	messages=messages,
	temperature=0.1,
	max_tokens=1024
	)

	# Add assistant's response to history
	assistant_message = completion.choices[0].message.content.strip()
	messages.append({"role": "assistant", "content": assistant_message})

	# Update conversation history
	if conversation_id:
	conversations[conversation_id] = messages

	return assistant_message
	except Exception as e:
	print(f"Error in chat_with_groq: {str(e)}")
	return f"I apologize, but I'm having trouble responding right now. Error: {str(e)}"

	def text_to_speech(text):
	try:
	tts = gTTS(text=text, lang='en')
	audio_io = io.BytesIO()
	tts.write_to_fp(audio_io)
	audio_io.seek(0)
	return audio_io
	except Exception as e:
	print(f"Error in text_to_speech: {str(e)}")
	return None

	def speech_to_text(audio_file):
	try:
	# Save the uploaded audio to a temporary file
	with tempfile.NamedTemporaryFile(delete=False, suffix='.wav') as temp_audio:
	audio_file.save(temp_audio.name)

	# Use SpeechRecognition to convert speech to text
	with sr.AudioFile(temp_audio.name) as source:
	# Adjust recognition settings
	recognizer.dynamic_energy_threshold = True
	recognizer.energy_threshold = 4000

	# Record the entire audio file
	audio = recognizer.record(source)

	# Perform recognition with increased timeout
	text = recognizer.recognize_google(audio, language='en-US')
	return text

	except sr.UnknownValueError:
	return "Could not understand audio"
	except sr.RequestError as e:
	return f"Could not request results; {str(e)}"
	except Exception as e:
	print(f"Error in speech_to_text: {str(e)}")
	return None
	finally:
	# Clean up temporary file
	try:
	os.unlink(temp_audio.name)
	except:
	pass

	@app.route('/')
	def index():
	return render_template('index.html')

	@app.route('/api/chat', methods=['POST'])
	def chat():
	try:
	data = request.get_json()
	user_message = data.get('message', '')
	conversation_id = data.get('conversation_id', str(uuid.uuid4()))

	if not user_message:
	return jsonify({'error': 'No message provided'}), 400

	# Get response from Groq
	response = chat_with_groq(user_message, conversation_id)

	# Generate voice response
	audio_io = text_to_speech(response)
	result = {
	'response': response,
	'conversation_id': conversation_id
	}

	if audio_io:
	audio_base64 = base64.b64encode(audio_io.getvalue()).decode('utf-8')
	result['voice_response'] = audio_base64

	return jsonify(result)

	except Exception as e:
	return jsonify({'error': str(e)}), 500

	@app.route('/api/voice', methods=['POST'])
	def handle_voice():
	try:
	if 'audio' not in request.files:
	return jsonify({'error': 'No audio file provided'}), 400

	audio_file = request.files['audio']
	conversation_id = request.form.get('conversation_id', str(uuid.uuid4()))

	# Process audio
	audio_processor = AudioProcessor()
	wav_path = audio_processor.process_audio(audio_file)

	# Perform speech recognition
	recognizer = sr.Recognizer()
	with sr.AudioFile(wav_path) as source:
	audio_data = recognizer.record(source)
	text = recognizer.recognize_google(audio_data)

	if not text:
	return jsonify({'error': 'Could not transcribe audio'}), 400

	# Get chatbot response
	response = chat_with_groq(text, conversation_id)

	# Generate voice response
	audio_io = text_to_speech(response)
	result = {
	'text': text,
	'response': response,
	'conversation_id': conversation_id
	}

	if audio_io:
	audio_base64 = base64.b64encode(audio_io.getvalue()).decode('utf-8')
	result['voice_response'] = audio_base64

	return jsonify(result)

	except Exception as e:
	print(f"Error in handle_voice: {str(e)}")
	return jsonify({'error': str(e)}), 400
	if __name__ == '__main__':
	app.run(host='0.0.0.0', port=7860)