anshharora commited on
Commit
6600d84
·
verified ·
1 Parent(s): e49447a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +41 -72
app.py CHANGED
@@ -1,11 +1,15 @@
1
- import sounddevice as sd
2
- import scipy.io.wavfile as wav
3
- import numpy as np
4
- from pydub import AudioSegment
 
 
5
  import io
6
  import tempfile
7
- import os
8
-
 
 
9
 
10
  # Set up logging
11
  logging.basicConfig(level=logging.DEBUG)
@@ -18,39 +22,24 @@ class AudioProcessor:
18
 
19
  def process_audio(self, audio_file):
20
  """Process incoming audio file and convert to proper format"""
21
- with tempfile.TemporaryDirectory() as temp_dir:
22
- # Save incoming audio
23
- input_path = os.path.join(temp_dir, 'input.webm')
24
- audio_file.save(input_path)
25
-
26
- # Convert to WAV using pydub
27
- audio = AudioSegment.from_file(input_path)
28
- audio = audio.set_channels(self.channels)
29
- audio = audio.set_frame_rate(self.sample_rate)
30
-
31
- output_path = os.path.join(temp_dir, 'output.wav')
32
- audio.export(output_path, format='wav')
33
-
34
- # Read the processed audio file
35
- return output_path
36
-
37
- def record_audio(self, duration=5):
38
- """Record audio using sounddevice"""
39
- recording = sd.rec(
40
- int(duration * self.sample_rate),
41
- samplerate=self.sample_rate,
42
- channels=self.channels
43
- )
44
- sd.wait()
45
- return recording
46
-
47
-
48
-
49
-
50
- try:
51
- import pyaudio
52
- except ImportError:
53
- print("Warning: PyAudio not available, speech functionality will be limited")
54
 
55
  # Initialize Flask app
56
  app = Flask(__name__, static_folder='static')
@@ -66,15 +55,6 @@ MODEL = "llama3-70b-8192"
66
  # Initialize speech recognition
67
  recognizer = sr.Recognizer()
68
 
69
- def init_speech_recognition():
70
- """Initialize speech recognition with fallback options"""
71
- try:
72
- recognizer = sr.Recognizer()
73
- return recognizer
74
- except Exception as e:
75
- logger.error(f"Failed to initialize speech recognition: {e}")
76
- return None
77
-
78
  # Store conversation history
79
  conversations = {}
80
 
@@ -83,7 +63,7 @@ def load_base_prompt():
83
  with open("base_prompt.txt", "r") as file:
84
  return file.read().strip()
85
  except FileNotFoundError:
86
- print("Error: base_prompt.txt file not found.")
87
  return "You are a helpful assistant for language learning."
88
 
89
  # Load the base prompt
@@ -117,7 +97,7 @@ def chat_with_groq(user_message, conversation_id=None):
117
 
118
  return assistant_message
119
  except Exception as e:
120
- print(f"Error in chat_with_groq: {str(e)}")
121
  return f"I apologize, but I'm having trouble responding right now. Error: {str(e)}"
122
 
123
  def text_to_speech(text):
@@ -128,17 +108,12 @@ def text_to_speech(text):
128
  audio_io.seek(0)
129
  return audio_io
130
  except Exception as e:
131
- print(f"Error in text_to_speech: {str(e)}")
132
  return None
133
 
134
- def speech_to_text(audio_file):
135
  try:
136
- # Save the uploaded audio to a temporary file
137
- with tempfile.NamedTemporaryFile(delete=False, suffix='.wav') as temp_audio:
138
- audio_file.save(temp_audio.name)
139
-
140
- # Use SpeechRecognition to convert speech to text
141
- with sr.AudioFile(temp_audio.name) as source:
142
  # Adjust recognition settings
143
  recognizer.dynamic_energy_threshold = True
144
  recognizer.energy_threshold = 4000
@@ -146,23 +121,18 @@ def speech_to_text(audio_file):
146
  # Record the entire audio file
147
  audio = recognizer.record(source)
148
 
149
- # Perform recognition with increased timeout
150
  text = recognizer.recognize_google(audio, language='en-US')
151
  return text
152
 
153
  except sr.UnknownValueError:
154
  return "Could not understand audio"
155
  except sr.RequestError as e:
 
156
  return f"Could not request results; {str(e)}"
157
  except Exception as e:
158
- print(f"Error in speech_to_text: {str(e)}")
159
  return None
160
- finally:
161
- # Clean up temporary file
162
- try:
163
- os.unlink(temp_audio.name)
164
- except:
165
- pass
166
 
167
  @app.route('/')
168
  def index():
@@ -195,6 +165,7 @@ def chat():
195
  return jsonify(result)
196
 
197
  except Exception as e:
 
198
  return jsonify({'error': str(e)}), 500
199
 
200
  @app.route('/api/voice', methods=['POST'])
@@ -211,10 +182,7 @@ def handle_voice():
211
  wav_path = audio_processor.process_audio(audio_file)
212
 
213
  # Perform speech recognition
214
- recognizer = sr.Recognizer()
215
- with sr.AudioFile(wav_path) as source:
216
- audio_data = recognizer.record(source)
217
- text = recognizer.recognize_google(audio_data)
218
 
219
  if not text:
220
  return jsonify({'error': 'Could not transcribe audio'}), 400
@@ -237,7 +205,8 @@ def handle_voice():
237
  return jsonify(result)
238
 
239
  except Exception as e:
240
- print(f"Error in handle_voice: {str(e)}")
241
  return jsonify({'error': str(e)}), 400
 
242
  if __name__ == '__main__':
243
- app.run(host='0.0.0.0', port=7860)
 
1
+ from flask import Flask, request, jsonify, render_template
2
+ import os
3
+ import uuid
4
+ import base64
5
+ import logging
6
+ from dotenv import load_dotenv
7
  import io
8
  import tempfile
9
+ from gtts import gTTS
10
+ from groq import Groq
11
+ import speech_recognition as sr
12
+ from pydub import AudioSegment
13
 
14
  # Set up logging
15
  logging.basicConfig(level=logging.DEBUG)
 
22
 
23
  def process_audio(self, audio_file):
24
  """Process incoming audio file and convert to proper format"""
25
+ try:
26
+ with tempfile.TemporaryDirectory() as temp_dir:
27
+ # Save incoming audio
28
+ input_path = os.path.join(temp_dir, 'input.webm')
29
+ audio_file.save(input_path)
30
+
31
+ # Convert to WAV using pydub
32
+ audio = AudioSegment.from_file(input_path)
33
+ audio = audio.set_channels(self.channels)
34
+ audio = audio.set_frame_rate(self.sample_rate)
35
+
36
+ output_path = os.path.join(temp_dir, 'output.wav')
37
+ audio.export(output_path, format='wav')
38
+
39
+ return output_path
40
+ except Exception as e:
41
+ logger.error(f"Error processing audio: {e}")
42
+ raise
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
43
 
44
  # Initialize Flask app
45
  app = Flask(__name__, static_folder='static')
 
55
  # Initialize speech recognition
56
  recognizer = sr.Recognizer()
57
 
 
 
 
 
 
 
 
 
 
58
  # Store conversation history
59
  conversations = {}
60
 
 
63
  with open("base_prompt.txt", "r") as file:
64
  return file.read().strip()
65
  except FileNotFoundError:
66
+ logger.warning("base_prompt.txt not found, using default prompt")
67
  return "You are a helpful assistant for language learning."
68
 
69
  # Load the base prompt
 
97
 
98
  return assistant_message
99
  except Exception as e:
100
+ logger.error(f"Error in chat_with_groq: {e}")
101
  return f"I apologize, but I'm having trouble responding right now. Error: {str(e)}"
102
 
103
  def text_to_speech(text):
 
108
  audio_io.seek(0)
109
  return audio_io
110
  except Exception as e:
111
+ logger.error(f"Error in text_to_speech: {e}")
112
  return None
113
 
114
+ def speech_to_text(audio_path):
115
  try:
116
+ with sr.AudioFile(audio_path) as source:
 
 
 
 
 
117
  # Adjust recognition settings
118
  recognizer.dynamic_energy_threshold = True
119
  recognizer.energy_threshold = 4000
 
121
  # Record the entire audio file
122
  audio = recognizer.record(source)
123
 
124
+ # Perform recognition
125
  text = recognizer.recognize_google(audio, language='en-US')
126
  return text
127
 
128
  except sr.UnknownValueError:
129
  return "Could not understand audio"
130
  except sr.RequestError as e:
131
+ logger.error(f"Speech recognition request error: {e}")
132
  return f"Could not request results; {str(e)}"
133
  except Exception as e:
134
+ logger.error(f"Error in speech_to_text: {e}")
135
  return None
 
 
 
 
 
 
136
 
137
  @app.route('/')
138
  def index():
 
165
  return jsonify(result)
166
 
167
  except Exception as e:
168
+ logger.error(f"Error in chat endpoint: {e}")
169
  return jsonify({'error': str(e)}), 500
170
 
171
  @app.route('/api/voice', methods=['POST'])
 
182
  wav_path = audio_processor.process_audio(audio_file)
183
 
184
  # Perform speech recognition
185
+ text = speech_to_text(wav_path)
 
 
 
186
 
187
  if not text:
188
  return jsonify({'error': 'Could not transcribe audio'}), 400
 
205
  return jsonify(result)
206
 
207
  except Exception as e:
208
+ logger.error(f"Error in handle_voice: {e}")
209
  return jsonify({'error': str(e)}), 400
210
+
211
  if __name__ == '__main__':
212
+ app.run(host='0.0.0.0', port=7860)