Spaces:
Running
Running
from flask import Flask, request, jsonify, render_template | |
import os | |
import uuid | |
import base64 | |
import logging | |
from dotenv import load_dotenv | |
import io | |
import tempfile | |
from gtts import gTTS | |
from groq import Groq | |
import speech_recognition as sr | |
from pydub import AudioSegment | |
# Set up logging | |
logging.basicConfig(level=logging.DEBUG) | |
logger = logging.getLogger(__name__) | |
class AudioProcessor: | |
def __init__(self): | |
self.sample_rate = 16000 | |
self.channels = 1 | |
def process_audio(self, audio_file): | |
"""Process incoming audio file and convert to proper format""" | |
try: | |
with tempfile.TemporaryDirectory() as temp_dir: | |
# Save incoming audio | |
input_path = os.path.join(temp_dir, 'input.webm') | |
audio_file.save(input_path) | |
# Convert to WAV using pydub | |
audio = AudioSegment.from_file(input_path) | |
audio = audio.set_channels(self.channels) | |
audio = audio.set_frame_rate(self.sample_rate) | |
output_path = os.path.join(temp_dir, 'output.wav') | |
audio.export(output_path, format='wav') | |
return output_path | |
except Exception as e: | |
logger.error(f"Error processing audio: {e}") | |
raise | |
# Initialize Flask app | |
app = Flask(__name__, static_folder='static') | |
# Load environment variables | |
load_dotenv() | |
# Groq API Configuration | |
GROQ_API_KEY = os.getenv("GROQ_API_KEY") | |
client = Groq(api_key=GROQ_API_KEY) | |
MODEL = "llama3-70b-8192" | |
# Initialize speech recognition | |
recognizer = sr.Recognizer() | |
# Store conversation history | |
conversations = {} | |
def load_base_prompt(): | |
try: | |
with open("base_prompt.txt", "r") as file: | |
return file.read().strip() | |
except FileNotFoundError: | |
logger.warning("base_prompt.txt not found, using default prompt") | |
return "You are a helpful assistant for language learning." | |
# Load the base prompt | |
base_prompt = load_base_prompt() | |
def chat_with_groq(user_message, conversation_id=None): | |
try: | |
# Get conversation history or create new | |
messages = conversations.get(conversation_id, []) | |
if not messages: | |
messages.append({"role": "system", "content": base_prompt}) | |
# Add user message | |
messages.append({"role": "user", "content": user_message}) | |
# Get completion from Groq | |
completion = client.chat.completions.create( | |
model=MODEL, | |
messages=messages, | |
temperature=0.1, | |
max_tokens=1024 | |
) | |
# Add assistant's response to history | |
assistant_message = completion.choices[0].message.content.strip() | |
messages.append({"role": "assistant", "content": assistant_message}) | |
# Update conversation history | |
if conversation_id: | |
conversations[conversation_id] = messages | |
return assistant_message | |
except Exception as e: | |
logger.error(f"Error in chat_with_groq: {e}") | |
return f"I apologize, but I'm having trouble responding right now. Error: {str(e)}" | |
def text_to_speech(text): | |
try: | |
tts = gTTS(text=text, lang='en') | |
audio_io = io.BytesIO() | |
tts.write_to_fp(audio_io) | |
audio_io.seek(0) | |
return audio_io | |
except Exception as e: | |
logger.error(f"Error in text_to_speech: {e}") | |
return None | |
def speech_to_text(audio_path): | |
try: | |
with sr.AudioFile(audio_path) as source: | |
# Adjust recognition settings | |
recognizer.dynamic_energy_threshold = True | |
recognizer.energy_threshold = 4000 | |
# Record the entire audio file | |
audio = recognizer.record(source) | |
# Perform recognition | |
text = recognizer.recognize_google(audio, language='en-US') | |
return text | |
except sr.UnknownValueError: | |
return "Could not understand audio" | |
except sr.RequestError as e: | |
logger.error(f"Speech recognition request error: {e}") | |
return f"Could not request results; {str(e)}" | |
except Exception as e: | |
logger.error(f"Error in speech_to_text: {e}") | |
return None | |
def index(): | |
return render_template('index.html') | |
def chat(): | |
try: | |
data = request.get_json() | |
user_message = data.get('message', '') | |
conversation_id = data.get('conversation_id', str(uuid.uuid4())) | |
if not user_message: | |
return jsonify({'error': 'No message provided'}), 400 | |
# Get response from Groq | |
response = chat_with_groq(user_message, conversation_id) | |
# Generate voice response | |
audio_io = text_to_speech(response) | |
result = { | |
'response': response, | |
'conversation_id': conversation_id | |
} | |
if audio_io: | |
audio_base64 = base64.b64encode(audio_io.getvalue()).decode('utf-8') | |
result['voice_response'] = audio_base64 | |
return jsonify(result) | |
except Exception as e: | |
logger.error(f"Error in chat endpoint: {e}") | |
return jsonify({'error': str(e)}), 500 | |
def handle_voice(): | |
try: | |
if 'audio' not in request.files: | |
return jsonify({'error': 'No audio file provided'}), 400 | |
audio_file = request.files['audio'] | |
conversation_id = request.form.get('conversation_id', str(uuid.uuid4())) | |
# Process audio | |
audio_processor = AudioProcessor() | |
wav_path = audio_processor.process_audio(audio_file) | |
# Perform speech recognition | |
text = speech_to_text(wav_path) | |
if not text: | |
return jsonify({'error': 'Could not transcribe audio'}), 400 | |
# Get chatbot response | |
response = chat_with_groq(text, conversation_id) | |
# Generate voice response | |
audio_io = text_to_speech(response) | |
result = { | |
'text': text, | |
'response': response, | |
'conversation_id': conversation_id | |
} | |
if audio_io: | |
audio_base64 = base64.b64encode(audio_io.getvalue()).decode('utf-8') | |
result['voice_response'] = audio_base64 | |
return jsonify(result) | |
except Exception as e: | |
logger.error(f"Error in handle_voice: {e}") | |
return jsonify({'error': str(e)}), 400 | |
if __name__ == '__main__': | |
app.run(host='0.0.0.0', port=7860) |