Spaces:
Running
Running
import sounddevice as sd | |
import scipy.io.wavfile as wav | |
import numpy as np | |
from pydub import AudioSegment | |
import io | |
import tempfile | |
import os | |
# Set up logging | |
logging.basicConfig(level=logging.DEBUG) | |
logger = logging.getLogger(__name__) | |
class AudioProcessor: | |
def __init__(self): | |
self.sample_rate = 16000 | |
self.channels = 1 | |
def process_audio(self, audio_file): | |
"""Process incoming audio file and convert to proper format""" | |
with tempfile.TemporaryDirectory() as temp_dir: | |
# Save incoming audio | |
input_path = os.path.join(temp_dir, 'input.webm') | |
audio_file.save(input_path) | |
# Convert to WAV using pydub | |
audio = AudioSegment.from_file(input_path) | |
audio = audio.set_channels(self.channels) | |
audio = audio.set_frame_rate(self.sample_rate) | |
output_path = os.path.join(temp_dir, 'output.wav') | |
audio.export(output_path, format='wav') | |
# Read the processed audio file | |
return output_path | |
def record_audio(self, duration=5): | |
"""Record audio using sounddevice""" | |
recording = sd.rec( | |
int(duration * self.sample_rate), | |
samplerate=self.sample_rate, | |
channels=self.channels | |
) | |
sd.wait() | |
return recording | |
try: | |
import pyaudio | |
except ImportError: | |
print("Warning: PyAudio not available, speech functionality will be limited") | |
# Initialize Flask app | |
app = Flask(__name__, static_folder='static') | |
# Load environment variables | |
load_dotenv() | |
# Groq API Configuration | |
GROQ_API_KEY = os.getenv("GROQ_API_KEY") | |
client = Groq(api_key=GROQ_API_KEY) | |
MODEL = "llama3-70b-8192" | |
# Initialize speech recognition | |
recognizer = sr.Recognizer() | |
def init_speech_recognition(): | |
"""Initialize speech recognition with fallback options""" | |
try: | |
recognizer = sr.Recognizer() | |
return recognizer | |
except Exception as e: | |
logger.error(f"Failed to initialize speech recognition: {e}") | |
return None | |
# Store conversation history | |
conversations = {} | |
def load_base_prompt(): | |
try: | |
with open("base_prompt.txt", "r") as file: | |
return file.read().strip() | |
except FileNotFoundError: | |
print("Error: base_prompt.txt file not found.") | |
return "You are a helpful assistant for language learning." | |
# Load the base prompt | |
base_prompt = load_base_prompt() | |
def chat_with_groq(user_message, conversation_id=None): | |
try: | |
# Get conversation history or create new | |
messages = conversations.get(conversation_id, []) | |
if not messages: | |
messages.append({"role": "system", "content": base_prompt}) | |
# Add user message | |
messages.append({"role": "user", "content": user_message}) | |
# Get completion from Groq | |
completion = client.chat.completions.create( | |
model=MODEL, | |
messages=messages, | |
temperature=0.1, | |
max_tokens=1024 | |
) | |
# Add assistant's response to history | |
assistant_message = completion.choices[0].message.content.strip() | |
messages.append({"role": "assistant", "content": assistant_message}) | |
# Update conversation history | |
if conversation_id: | |
conversations[conversation_id] = messages | |
return assistant_message | |
except Exception as e: | |
print(f"Error in chat_with_groq: {str(e)}") | |
return f"I apologize, but I'm having trouble responding right now. Error: {str(e)}" | |
def text_to_speech(text): | |
try: | |
tts = gTTS(text=text, lang='en') | |
audio_io = io.BytesIO() | |
tts.write_to_fp(audio_io) | |
audio_io.seek(0) | |
return audio_io | |
except Exception as e: | |
print(f"Error in text_to_speech: {str(e)}") | |
return None | |
def speech_to_text(audio_file): | |
try: | |
# Save the uploaded audio to a temporary file | |
with tempfile.NamedTemporaryFile(delete=False, suffix='.wav') as temp_audio: | |
audio_file.save(temp_audio.name) | |
# Use SpeechRecognition to convert speech to text | |
with sr.AudioFile(temp_audio.name) as source: | |
# Adjust recognition settings | |
recognizer.dynamic_energy_threshold = True | |
recognizer.energy_threshold = 4000 | |
# Record the entire audio file | |
audio = recognizer.record(source) | |
# Perform recognition with increased timeout | |
text = recognizer.recognize_google(audio, language='en-US') | |
return text | |
except sr.UnknownValueError: | |
return "Could not understand audio" | |
except sr.RequestError as e: | |
return f"Could not request results; {str(e)}" | |
except Exception as e: | |
print(f"Error in speech_to_text: {str(e)}") | |
return None | |
finally: | |
# Clean up temporary file | |
try: | |
os.unlink(temp_audio.name) | |
except: | |
pass | |
def index(): | |
return render_template('index.html') | |
def chat(): | |
try: | |
data = request.get_json() | |
user_message = data.get('message', '') | |
conversation_id = data.get('conversation_id', str(uuid.uuid4())) | |
if not user_message: | |
return jsonify({'error': 'No message provided'}), 400 | |
# Get response from Groq | |
response = chat_with_groq(user_message, conversation_id) | |
# Generate voice response | |
audio_io = text_to_speech(response) | |
result = { | |
'response': response, | |
'conversation_id': conversation_id | |
} | |
if audio_io: | |
audio_base64 = base64.b64encode(audio_io.getvalue()).decode('utf-8') | |
result['voice_response'] = audio_base64 | |
return jsonify(result) | |
except Exception as e: | |
return jsonify({'error': str(e)}), 500 | |
def handle_voice(): | |
try: | |
if 'audio' not in request.files: | |
return jsonify({'error': 'No audio file provided'}), 400 | |
audio_file = request.files['audio'] | |
conversation_id = request.form.get('conversation_id', str(uuid.uuid4())) | |
# Process audio | |
audio_processor = AudioProcessor() | |
wav_path = audio_processor.process_audio(audio_file) | |
# Perform speech recognition | |
recognizer = sr.Recognizer() | |
with sr.AudioFile(wav_path) as source: | |
audio_data = recognizer.record(source) | |
text = recognizer.recognize_google(audio_data) | |
if not text: | |
return jsonify({'error': 'Could not transcribe audio'}), 400 | |
# Get chatbot response | |
response = chat_with_groq(text, conversation_id) | |
# Generate voice response | |
audio_io = text_to_speech(response) | |
result = { | |
'text': text, | |
'response': response, | |
'conversation_id': conversation_id | |
} | |
if audio_io: | |
audio_base64 = base64.b64encode(audio_io.getvalue()).decode('utf-8') | |
result['voice_response'] = audio_base64 | |
return jsonify(result) | |
except Exception as e: | |
print(f"Error in handle_voice: {str(e)}") | |
return jsonify({'error': str(e)}), 400 | |
if __name__ == '__main__': | |
app.run(host='0.0.0.0', port=7860) | |