anshharora commited on
Commit
777758b
·
verified ·
1 Parent(s): 1aefd52

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +213 -190
app.py CHANGED
@@ -1,190 +1,213 @@
1
- from flask import Flask, request, jsonify, render_template
2
- from dotenv import load_dotenv
3
- from groq import Groq
4
- import os
5
- import uuid
6
- from gtts import gTTS
7
- import io
8
- import base64
9
- import speech_recognition as sr
10
- import tempfile
11
- import json
12
-
13
- try:
14
- import pyaudio
15
- except ImportError:
16
- print("Warning: PyAudio not available, speech functionality will be limited")
17
-
18
- # Initialize Flask app
19
- app = Flask(__name__, static_folder='static')
20
-
21
- # Load environment variables
22
- load_dotenv()
23
-
24
- # Groq API Configuration
25
- GROQ_API_KEY = os.getenv("GROQ_API_KEY")
26
- client = Groq(api_key=GROQ_API_KEY)
27
- MODEL = "llama3-70b-8192"
28
-
29
- # Initialize speech recognition
30
- recognizer = sr.Recognizer()
31
-
32
- # Store conversation history
33
- conversations = {}
34
-
35
- def load_base_prompt():
36
- try:
37
- with open("base_prompt.txt", "r") as file:
38
- return file.read().strip()
39
- except FileNotFoundError:
40
- print("Error: base_prompt.txt file not found.")
41
- return "You are a helpful assistant for language learning."
42
-
43
- # Load the base prompt
44
- base_prompt = load_base_prompt()
45
-
46
- def chat_with_groq(user_message, conversation_id=None):
47
- try:
48
- # Get conversation history or create new
49
- messages = conversations.get(conversation_id, [])
50
- if not messages:
51
- messages.append({"role": "system", "content": base_prompt})
52
-
53
- # Add user message
54
- messages.append({"role": "user", "content": user_message})
55
-
56
- # Get completion from Groq
57
- completion = client.chat.completions.create(
58
- model=MODEL,
59
- messages=messages,
60
- temperature=0.1,
61
- max_tokens=1024
62
- )
63
-
64
- # Add assistant's response to history
65
- assistant_message = completion.choices[0].message.content.strip()
66
- messages.append({"role": "assistant", "content": assistant_message})
67
-
68
- # Update conversation history
69
- if conversation_id:
70
- conversations[conversation_id] = messages
71
-
72
- return assistant_message
73
- except Exception as e:
74
- print(f"Error in chat_with_groq: {str(e)}")
75
- return f"I apologize, but I'm having trouble responding right now. Error: {str(e)}"
76
-
77
- def text_to_speech(text):
78
- try:
79
- tts = gTTS(text=text, lang='en')
80
- audio_io = io.BytesIO()
81
- tts.write_to_fp(audio_io)
82
- audio_io.seek(0)
83
- return audio_io
84
- except Exception as e:
85
- print(f"Error in text_to_speech: {str(e)}")
86
- return None
87
-
88
- def speech_to_text(audio_file):
89
- try:
90
- # Save the uploaded audio to a temporary file
91
- with tempfile.NamedTemporaryFile(delete=False, suffix='.wav') as temp_audio:
92
- audio_file.save(temp_audio.name)
93
-
94
- # Use SpeechRecognition to convert speech to text
95
- with sr.AudioFile(temp_audio.name) as source:
96
- # Adjust recognition settings
97
- recognizer.dynamic_energy_threshold = True
98
- recognizer.energy_threshold = 4000
99
-
100
- # Record the entire audio file
101
- audio = recognizer.record(source)
102
-
103
- # Perform recognition with increased timeout
104
- text = recognizer.recognize_google(audio, language='en-US')
105
- return text
106
-
107
- except sr.UnknownValueError:
108
- return "Could not understand audio"
109
- except sr.RequestError as e:
110
- return f"Could not request results; {str(e)}"
111
- except Exception as e:
112
- print(f"Error in speech_to_text: {str(e)}")
113
- return None
114
- finally:
115
- # Clean up temporary file
116
- try:
117
- os.unlink(temp_audio.name)
118
- except:
119
- pass
120
-
121
- @app.route('/')
122
- def index():
123
- return render_template('index.html')
124
-
125
- @app.route('/api/chat', methods=['POST'])
126
- def chat():
127
- try:
128
- data = request.get_json()
129
- user_message = data.get('message', '')
130
- conversation_id = data.get('conversation_id', str(uuid.uuid4()))
131
-
132
- if not user_message:
133
- return jsonify({'error': 'No message provided'}), 400
134
-
135
- # Get response from Groq
136
- response = chat_with_groq(user_message, conversation_id)
137
-
138
- # Generate voice response
139
- audio_io = text_to_speech(response)
140
- result = {
141
- 'response': response,
142
- 'conversation_id': conversation_id
143
- }
144
-
145
- if audio_io:
146
- audio_base64 = base64.b64encode(audio_io.getvalue()).decode('utf-8')
147
- result['voice_response'] = audio_base64
148
-
149
- return jsonify(result)
150
-
151
- except Exception as e:
152
- return jsonify({'error': str(e)}), 500
153
-
154
- @app.route('/api/voice', methods=['POST'])
155
- def handle_voice():
156
- try:
157
- if 'audio' not in request.files:
158
- return jsonify({'error': 'No audio file provided'}), 400
159
-
160
- audio_file = request.files['audio']
161
- conversation_id = request.form.get('conversation_id', str(uuid.uuid4()))
162
-
163
- # Convert speech to text
164
- text = speech_to_text(audio_file)
165
-
166
- if not text:
167
- return jsonify({'error': 'Could not transcribe audio'}), 400
168
-
169
- # Get response from Groq
170
- response = chat_with_groq(text, conversation_id)
171
-
172
- # Generate voice response
173
- audio_io = text_to_speech(response)
174
- result = {
175
- 'text': text,
176
- 'response': response,
177
- 'conversation_id': conversation_id
178
- }
179
-
180
- if audio_io:
181
- audio_base64 = base64.b64encode(audio_io.getvalue()).decode('utf-8')
182
- result['voice_response'] = audio_base64
183
-
184
- return jsonify(result)
185
-
186
- except Exception as e:
187
- return jsonify({'error': str(e)}), 500
188
-
189
- if __name__ == '__main__':
190
- app.run(host='0.0.0.0', port=7860)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from flask import Flask, request, jsonify, render_template
2
+ from dotenv import load_dotenv
3
+ from groq import Groq
4
+ import os
5
+ import uuid
6
+ from gtts import gTTS
7
+ import io
8
+ import base64
9
+ import speech_recognition as sr
10
+ import tempfile
11
+ import json
12
+
13
+ try:
14
+ import pyaudio
15
+ except ImportError:
16
+ print("Warning: PyAudio not available, speech functionality will be limited")
17
+
18
+ # Initialize Flask app
19
+ app = Flask(__name__, static_folder='static')
20
+
21
+ # Load environment variables
22
+ load_dotenv()
23
+
24
+ # Groq API Configuration
25
+ GROQ_API_KEY = os.getenv("GROQ_API_KEY")
26
+ client = Groq(api_key=GROQ_API_KEY)
27
+ MODEL = "llama3-70b-8192"
28
+
29
+ # Initialize speech recognition
30
+ recognizer = sr.Recognizer()
31
+
32
+ # Store conversation history
33
+ conversations = {}
34
+
35
+ def load_base_prompt():
36
+ try:
37
+ with open("base_prompt.txt", "r") as file:
38
+ return file.read().strip()
39
+ except FileNotFoundError:
40
+ print("Error: base_prompt.txt file not found.")
41
+ return "You are a helpful assistant for language learning."
42
+
43
+ # Load the base prompt
44
+ base_prompt = load_base_prompt()
45
+
46
+ def chat_with_groq(user_message, conversation_id=None):
47
+ try:
48
+ # Get conversation history or create new
49
+ messages = conversations.get(conversation_id, [])
50
+ if not messages:
51
+ messages.append({"role": "system", "content": base_prompt})
52
+
53
+ # Add user message
54
+ messages.append({"role": "user", "content": user_message})
55
+
56
+ # Get completion from Groq
57
+ completion = client.chat.completions.create(
58
+ model=MODEL,
59
+ messages=messages,
60
+ temperature=0.1,
61
+ max_tokens=1024
62
+ )
63
+
64
+ # Add assistant's response to history
65
+ assistant_message = completion.choices[0].message.content.strip()
66
+ messages.append({"role": "assistant", "content": assistant_message})
67
+
68
+ # Update conversation history
69
+ if conversation_id:
70
+ conversations[conversation_id] = messages
71
+
72
+ return assistant_message
73
+ except Exception as e:
74
+ print(f"Error in chat_with_groq: {str(e)}")
75
+ return f"I apologize, but I'm having trouble responding right now. Error: {str(e)}"
76
+
77
+ def text_to_speech(text):
78
+ try:
79
+ tts = gTTS(text=text, lang='en')
80
+ audio_io = io.BytesIO()
81
+ tts.write_to_fp(audio_io)
82
+ audio_io.seek(0)
83
+ return audio_io
84
+ except Exception as e:
85
+ print(f"Error in text_to_speech: {str(e)}")
86
+ return None
87
+
88
+ def speech_to_text(audio_file):
89
+ try:
90
+ # Save the uploaded audio to a temporary file
91
+ with tempfile.NamedTemporaryFile(delete=False, suffix='.wav') as temp_audio:
92
+ audio_file.save(temp_audio.name)
93
+
94
+ # Use SpeechRecognition to convert speech to text
95
+ with sr.AudioFile(temp_audio.name) as source:
96
+ # Adjust recognition settings
97
+ recognizer.dynamic_energy_threshold = True
98
+ recognizer.energy_threshold = 4000
99
+
100
+ # Record the entire audio file
101
+ audio = recognizer.record(source)
102
+
103
+ # Perform recognition with increased timeout
104
+ text = recognizer.recognize_google(audio, language='en-US')
105
+ return text
106
+
107
+ except sr.UnknownValueError:
108
+ return "Could not understand audio"
109
+ except sr.RequestError as e:
110
+ return f"Could not request results; {str(e)}"
111
+ except Exception as e:
112
+ print(f"Error in speech_to_text: {str(e)}")
113
+ return None
114
+ finally:
115
+ # Clean up temporary file
116
+ try:
117
+ os.unlink(temp_audio.name)
118
+ except:
119
+ pass
120
+
121
+ @app.route('/')
122
+ def index():
123
+ return render_template('index.html')
124
+
125
+ @app.route('/api/chat', methods=['POST'])
126
+ def chat():
127
+ try:
128
+ data = request.get_json()
129
+ user_message = data.get('message', '')
130
+ conversation_id = data.get('conversation_id', str(uuid.uuid4()))
131
+
132
+ if not user_message:
133
+ return jsonify({'error': 'No message provided'}), 400
134
+
135
+ # Get response from Groq
136
+ response = chat_with_groq(user_message, conversation_id)
137
+
138
+ # Generate voice response
139
+ audio_io = text_to_speech(response)
140
+ result = {
141
+ 'response': response,
142
+ 'conversation_id': conversation_id
143
+ }
144
+
145
+ if audio_io:
146
+ audio_base64 = base64.b64encode(audio_io.getvalue()).decode('utf-8')
147
+ result['voice_response'] = audio_base64
148
+
149
+ return jsonify(result)
150
+
151
+ except Exception as e:
152
+ return jsonify({'error': str(e)}), 500
153
+
154
+ @app.route('/api/voice', methods=['POST'])
155
+ def handle_voice():
156
+ try:
157
+ if 'audio' not in request.files:
158
+ return jsonify({'error': 'No audio file provided'}), 400
159
+
160
+ audio_file = request.files['audio']
161
+ conversation_id = request.form.get('conversation_id', str(uuid.uuid4()))
162
+
163
+ # Save the audio file temporarily with a .wav extension
164
+ with tempfile.NamedTemporaryFile(delete=False, suffix='.wav') as temp_audio:
165
+ audio_file.save(temp_audio.name)
166
+
167
+ # Use FFmpeg to convert the audio to the correct format
168
+ output_path = temp_audio.name + '_converted.wav'
169
+ os.system(f'ffmpeg -i {temp_audio.name} -acodec pcm_s16le -ac 1 -ar 16000 {output_path}')
170
+
171
+ try:
172
+ # Use the converted file for speech recognition
173
+ with sr.AudioFile(output_path) as source:
174
+ audio = recognizer.record(source)
175
+ text = recognizer.recognize_google(audio)
176
+
177
+ if not text:
178
+ return jsonify({'error': 'Could not transcribe audio'}), 400
179
+
180
+ # Get response from Groq
181
+ response = chat_with_groq(text, conversation_id)
182
+
183
+ # Generate voice response
184
+ audio_io = text_to_speech(response)
185
+ result = {
186
+ 'text': text,
187
+ 'response': response,
188
+ 'conversation_id': conversation_id
189
+ }
190
+
191
+ if audio_io:
192
+ audio_base64 = base64.b64encode(audio_io.getvalue()).decode('utf-8')
193
+ result['voice_response'] = audio_base64
194
+
195
+ return jsonify(result)
196
+
197
+ finally:
198
+ # Clean up temporary files
199
+ try:
200
+ os.remove(temp_audio.name)
201
+ os.remove(output_path)
202
+ except:
203
+ pass
204
+
205
+ except sr.UnknownValueError:
206
+ return jsonify({'error': 'Could not understand audio'}), 400
207
+ except sr.RequestError as e:
208
+ return jsonify({'error': f'Could not request results: {str(e)}'}), 400
209
+ except Exception as e:
210
+ print(f"Error in speech_to_text: {str(e)}")
211
+ return jsonify({'error': str(e)}), 400
212
+ if __name__ == '__main__':
213
+ app.run(host='0.0.0.0', port=7860)