Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -1,15 +1,15 @@
|
|
1 |
-
from flask import Flask, request, jsonify
|
2 |
-
from dotenv import load_dotenv
|
3 |
-
from groq import Groq
|
4 |
-
import os
|
5 |
-
import uuid
|
6 |
-
from gtts import gTTS
|
7 |
-
import io
|
8 |
-
import base64
|
9 |
import speech_recognition as sr
|
|
|
|
|
10 |
import tempfile
|
11 |
-
import json
|
12 |
from pydub import AudioSegment
|
|
|
|
|
|
|
|
|
|
|
|
|
13 |
|
14 |
|
15 |
try:
|
@@ -31,6 +31,15 @@ MODEL = "llama3-70b-8192"
|
|
31 |
# Initialize speech recognition
|
32 |
recognizer = sr.Recognizer()
|
33 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
34 |
# Store conversation history
|
35 |
conversations = {}
|
36 |
|
@@ -157,32 +166,47 @@ def chat():
|
|
157 |
def handle_voice():
|
158 |
try:
|
159 |
if 'audio' not in request.files:
|
|
|
160 |
return jsonify({'error': 'No audio file provided'}), 400
|
161 |
|
162 |
audio_file = request.files['audio']
|
163 |
conversation_id = request.form.get('conversation_id', str(uuid.uuid4()))
|
164 |
|
165 |
-
#
|
|
|
|
|
|
|
166 |
with tempfile.TemporaryDirectory() as temp_dir:
|
167 |
-
# Save
|
168 |
input_path = os.path.join(temp_dir, 'input.webm')
|
169 |
audio_file.save(input_path)
|
|
|
170 |
|
171 |
try:
|
172 |
-
# Convert
|
173 |
-
audio = AudioSegment.from_file(input_path
|
174 |
output_path = os.path.join(temp_dir, 'output.wav')
|
175 |
-
audio.export(output_path, format="wav"
|
|
|
|
|
176 |
|
177 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
178 |
with sr.AudioFile(output_path) as source:
|
179 |
-
audio_data = recognizer.record(source)
|
180 |
-
text = recognizer.recognize_google(audio_data)
|
|
|
181 |
|
182 |
if not text:
|
183 |
return jsonify({'error': 'Could not transcribe audio'}), 400
|
184 |
|
185 |
-
# Get response
|
186 |
response = chat_with_groq(text, conversation_id)
|
187 |
|
188 |
# Generate voice response
|
@@ -199,12 +223,18 @@ def handle_voice():
|
|
199 |
|
200 |
return jsonify(result)
|
201 |
|
|
|
|
|
|
|
|
|
|
|
|
|
202 |
except Exception as e:
|
203 |
-
|
204 |
return jsonify({'error': f'Error processing audio: {str(e)}'}), 400
|
205 |
|
206 |
except Exception as e:
|
207 |
-
|
208 |
-
return jsonify({'error': str(e)}),
|
209 |
if __name__ == '__main__':
|
210 |
app.run(host='0.0.0.0', port=7860)
|
|
|
1 |
+
from flask import Flask, request, jsonify
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2 |
import speech_recognition as sr
|
3 |
+
import io
|
4 |
+
import os
|
5 |
import tempfile
|
|
|
6 |
from pydub import AudioSegment
|
7 |
+
import logging
|
8 |
+
|
9 |
+
# Set up logging
|
10 |
+
logging.basicConfig(level=logging.DEBUG)
|
11 |
+
logger = logging.getLogger(__name__)
|
12 |
+
|
13 |
|
14 |
|
15 |
try:
|
|
|
31 |
# Initialize speech recognition
|
32 |
recognizer = sr.Recognizer()
|
33 |
|
34 |
+
def init_speech_recognition():
|
35 |
+
"""Initialize speech recognition with fallback options"""
|
36 |
+
try:
|
37 |
+
recognizer = sr.Recognizer()
|
38 |
+
return recognizer
|
39 |
+
except Exception as e:
|
40 |
+
logger.error(f"Failed to initialize speech recognition: {e}")
|
41 |
+
return None
|
42 |
+
|
43 |
# Store conversation history
|
44 |
conversations = {}
|
45 |
|
|
|
166 |
def handle_voice():
|
167 |
try:
|
168 |
if 'audio' not in request.files:
|
169 |
+
logger.error("No audio file in request")
|
170 |
return jsonify({'error': 'No audio file provided'}), 400
|
171 |
|
172 |
audio_file = request.files['audio']
|
173 |
conversation_id = request.form.get('conversation_id', str(uuid.uuid4()))
|
174 |
|
175 |
+
# Log incoming request details
|
176 |
+
logger.debug(f"Received audio file: {audio_file.filename}, "
|
177 |
+
f"Content type: {audio_file.content_type}")
|
178 |
+
|
179 |
with tempfile.TemporaryDirectory() as temp_dir:
|
180 |
+
# Save incoming audio
|
181 |
input_path = os.path.join(temp_dir, 'input.webm')
|
182 |
audio_file.save(input_path)
|
183 |
+
logger.debug(f"Saved audio file to: {input_path}")
|
184 |
|
185 |
try:
|
186 |
+
# Convert audio using pydub
|
187 |
+
audio = AudioSegment.from_file(input_path)
|
188 |
output_path = os.path.join(temp_dir, 'output.wav')
|
189 |
+
audio.export(output_path, format="wav",
|
190 |
+
parameters=["-ac", "1", "-ar", "16000"])
|
191 |
+
logger.debug("Audio conversion successful")
|
192 |
|
193 |
+
# Initialize recognition if not already done
|
194 |
+
if not hasattr(app, 'recognizer'):
|
195 |
+
app.recognizer = init_speech_recognition()
|
196 |
+
|
197 |
+
if not app.recognizer:
|
198 |
+
return jsonify({'error': 'Speech recognition unavailable'}), 503
|
199 |
+
|
200 |
+
# Perform speech recognition
|
201 |
with sr.AudioFile(output_path) as source:
|
202 |
+
audio_data = app.recognizer.record(source)
|
203 |
+
text = app.recognizer.recognize_google(audio_data)
|
204 |
+
logger.debug(f"Speech recognition result: {text}")
|
205 |
|
206 |
if not text:
|
207 |
return jsonify({'error': 'Could not transcribe audio'}), 400
|
208 |
|
209 |
+
# Get chatbot response
|
210 |
response = chat_with_groq(text, conversation_id)
|
211 |
|
212 |
# Generate voice response
|
|
|
223 |
|
224 |
return jsonify(result)
|
225 |
|
226 |
+
except sr.UnknownValueError:
|
227 |
+
logger.error("Speech recognition could not understand audio")
|
228 |
+
return jsonify({'error': 'Could not understand audio'}), 400
|
229 |
+
except sr.RequestError as e:
|
230 |
+
logger.error(f"Speech recognition service error: {e}")
|
231 |
+
return jsonify({'error': 'Speech recognition service error'}), 503
|
232 |
except Exception as e:
|
233 |
+
logger.error(f"Audio processing error: {e}")
|
234 |
return jsonify({'error': f'Error processing audio: {str(e)}'}), 400
|
235 |
|
236 |
except Exception as e:
|
237 |
+
logger.error(f"General error in handle_voice: {e}")
|
238 |
+
return jsonify({'error': str(e)}), 500
|
239 |
if __name__ == '__main__':
|
240 |
app.run(host='0.0.0.0', port=7860)
|