anshharora commited on
Commit
62acf57
·
verified ·
1 Parent(s): 57227ab

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +122 -15
app.py CHANGED
@@ -1,26 +1,35 @@
1
- # app.py
2
- from flask import Flask, request, jsonify, render_template, send_file
3
  from dotenv import load_dotenv
4
  from groq import Groq
5
  import os
6
  import uuid
7
- import tempfile
8
- import sounddevice as sd
9
- import numpy as np
10
  import io
11
  import base64
12
- import wave
13
  import speech_recognition as sr
14
- from gtts import gTTS
 
15
 
 
 
 
 
 
 
16
  app = Flask(__name__, static_folder='static')
 
 
17
  load_dotenv()
18
 
 
19
  GROQ_API_KEY = os.getenv("GROQ_API_KEY")
20
  client = Groq(api_key=GROQ_API_KEY)
21
  MODEL = "llama3-70b-8192"
22
 
 
23
  recognizer = sr.Recognizer()
 
 
24
  conversations = {}
25
 
26
  def load_base_prompt():
@@ -29,27 +38,34 @@ def load_base_prompt():
29
  return file.read().strip()
30
  except FileNotFoundError:
31
  print("Error: base_prompt.txt file not found.")
32
- return "You are a helpful assistant."
33
 
 
34
  base_prompt = load_base_prompt()
35
 
36
  def chat_with_groq(user_message, conversation_id=None):
37
  try:
 
38
  messages = conversations.get(conversation_id, [])
39
  if not messages:
40
  messages.append({"role": "system", "content": base_prompt})
41
 
 
42
  messages.append({"role": "user", "content": user_message})
43
 
 
44
  completion = client.chat.completions.create(
45
  model=MODEL,
46
  messages=messages,
47
  temperature=0.1,
 
48
  )
49
 
 
50
  assistant_message = completion.choices[0].message.content.strip()
51
  messages.append({"role": "assistant", "content": assistant_message})
52
 
 
53
  if conversation_id:
54
  conversations[conversation_id] = messages
55
 
@@ -69,6 +85,39 @@ def text_to_speech(text):
69
  print(f"Error in text_to_speech: {str(e)}")
70
  return None
71
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
72
  @app.route('/')
73
  def index():
74
  return render_template('index.html')
@@ -79,28 +128,86 @@ def chat():
79
  data = request.get_json()
80
  user_message = data.get('message', '')
81
  conversation_id = data.get('conversation_id', str(uuid.uuid4()))
82
- voice_output = data.get('voice_output', False)
83
 
84
  if not user_message:
85
  return jsonify({'error': 'No message provided'}), 400
86
 
 
87
  response = chat_with_groq(user_message, conversation_id)
88
 
 
 
89
  result = {
90
  'response': response,
91
  'conversation_id': conversation_id
92
  }
93
 
94
- if voice_output:
95
- audio_io = text_to_speech(response)
96
- if audio_io:
97
- audio_base64 = base64.b64encode(audio_io.getvalue()).decode('utf-8')
98
- result['voice_response'] = audio_base64
99
 
100
  return jsonify(result)
101
 
102
  except Exception as e:
103
  return jsonify({'error': str(e)}), 500
104
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
105
  if __name__ == '__main__':
106
- app.run(debug=True)
 
1
+ from flask import Flask, request, jsonify, render_template
 
2
  from dotenv import load_dotenv
3
  from groq import Groq
4
  import os
5
  import uuid
6
+ from gtts import gTTS
 
 
7
  import io
8
  import base64
 
9
  import speech_recognition as sr
10
+ import tempfile
11
+ import json
12
 
13
+ try:
14
+ import pyaudio
15
+ except ImportError:
16
+ print("Warning: PyAudio not available, speech functionality will be limited")
17
+
18
+ # Initialize Flask app
19
  app = Flask(__name__, static_folder='static')
20
+
21
+ # Load environment variables
22
  load_dotenv()
23
 
24
+ # Groq API Configuration
25
  GROQ_API_KEY = os.getenv("GROQ_API_KEY")
26
  client = Groq(api_key=GROQ_API_KEY)
27
  MODEL = "llama3-70b-8192"
28
 
29
+ # Initialize speech recognition
30
  recognizer = sr.Recognizer()
31
+
32
+ # Store conversation history
33
  conversations = {}
34
 
35
  def load_base_prompt():
 
38
  return file.read().strip()
39
  except FileNotFoundError:
40
  print("Error: base_prompt.txt file not found.")
41
+ return "You are a helpful assistant for language learning."
42
 
43
+ # Load the base prompt
44
  base_prompt = load_base_prompt()
45
 
46
  def chat_with_groq(user_message, conversation_id=None):
47
  try:
48
+ # Get conversation history or create new
49
  messages = conversations.get(conversation_id, [])
50
  if not messages:
51
  messages.append({"role": "system", "content": base_prompt})
52
 
53
+ # Add user message
54
  messages.append({"role": "user", "content": user_message})
55
 
56
+ # Get completion from Groq
57
  completion = client.chat.completions.create(
58
  model=MODEL,
59
  messages=messages,
60
  temperature=0.1,
61
+ max_tokens=1024
62
  )
63
 
64
+ # Add assistant's response to history
65
  assistant_message = completion.choices[0].message.content.strip()
66
  messages.append({"role": "assistant", "content": assistant_message})
67
 
68
+ # Update conversation history
69
  if conversation_id:
70
  conversations[conversation_id] = messages
71
 
 
85
  print(f"Error in text_to_speech: {str(e)}")
86
  return None
87
 
88
+ def speech_to_text(audio_file):
89
+ try:
90
+ # Save the uploaded audio to a temporary file
91
+ with tempfile.NamedTemporaryFile(delete=False, suffix='.wav') as temp_audio:
92
+ audio_file.save(temp_audio.name)
93
+
94
+ # Use SpeechRecognition to convert speech to text
95
+ with sr.AudioFile(temp_audio.name) as source:
96
+ # Adjust recognition settings
97
+ recognizer.dynamic_energy_threshold = True
98
+ recognizer.energy_threshold = 4000
99
+
100
+ # Record the entire audio file
101
+ audio = recognizer.record(source)
102
+
103
+ # Perform recognition with increased timeout
104
+ text = recognizer.recognize_google(audio, language='en-US')
105
+ return text
106
+
107
+ except sr.UnknownValueError:
108
+ return "Could not understand audio"
109
+ except sr.RequestError as e:
110
+ return f"Could not request results; {str(e)}"
111
+ except Exception as e:
112
+ print(f"Error in speech_to_text: {str(e)}")
113
+ return None
114
+ finally:
115
+ # Clean up temporary file
116
+ try:
117
+ os.unlink(temp_audio.name)
118
+ except:
119
+ pass
120
+
121
  @app.route('/')
122
  def index():
123
  return render_template('index.html')
 
128
  data = request.get_json()
129
  user_message = data.get('message', '')
130
  conversation_id = data.get('conversation_id', str(uuid.uuid4()))
 
131
 
132
  if not user_message:
133
  return jsonify({'error': 'No message provided'}), 400
134
 
135
+ # Get response from Groq
136
  response = chat_with_groq(user_message, conversation_id)
137
 
138
+ # Generate voice response
139
+ audio_io = text_to_speech(response)
140
  result = {
141
  'response': response,
142
  'conversation_id': conversation_id
143
  }
144
 
145
+ if audio_io:
146
+ audio_base64 = base64.b64encode(audio_io.getvalue()).decode('utf-8')
147
+ result['voice_response'] = audio_base64
 
 
148
 
149
  return jsonify(result)
150
 
151
  except Exception as e:
152
  return jsonify({'error': str(e)}), 500
153
 
154
+ @app.route('/api/voice', methods=['POST'])
155
+ def handle_voice():
156
+ try:
157
+ if 'audio' not in request.files:
158
+ return jsonify({'error': 'No audio file provided'}), 400
159
+
160
+ audio_file = request.files['audio']
161
+ conversation_id = request.form.get('conversation_id', str(uuid.uuid4()))
162
+
163
+ # Save the audio file temporarily with a .wav extension
164
+ with tempfile.NamedTemporaryFile(delete=False, suffix='.wav') as temp_audio:
165
+ audio_file.save(temp_audio.name)
166
+
167
+ # Use FFmpeg to convert the audio to the correct format
168
+ output_path = temp_audio.name + '_converted.wav'
169
+ os.system(f'ffmpeg -i {temp_audio.name} -acodec pcm_s16le -ac 1 -ar 16000 {output_path}')
170
+
171
+ try:
172
+ # Use the converted file for speech recognition
173
+ with sr.AudioFile(output_path) as source:
174
+ audio = recognizer.record(source)
175
+ text = recognizer.recognize_google(audio)
176
+
177
+ if not text:
178
+ return jsonify({'error': 'Could not transcribe audio'}), 400
179
+
180
+ # Get response from Groq
181
+ response = chat_with_groq(text, conversation_id)
182
+
183
+ # Generate voice response
184
+ audio_io = text_to_speech(response)
185
+ result = {
186
+ 'text': text,
187
+ 'response': response,
188
+ 'conversation_id': conversation_id
189
+ }
190
+
191
+ if audio_io:
192
+ audio_base64 = base64.b64encode(audio_io.getvalue()).decode('utf-8')
193
+ result['voice_response'] = audio_base64
194
+
195
+ return jsonify(result)
196
+
197
+ finally:
198
+ # Clean up temporary files
199
+ try:
200
+ os.remove(temp_audio.name)
201
+ os.remove(output_path)
202
+ except:
203
+ pass
204
+
205
+ except sr.UnknownValueError:
206
+ return jsonify({'error': 'Could not understand audio'}), 400
207
+ except sr.RequestError as e:
208
+ return jsonify({'error': f'Could not request results: {str(e)}'}), 400
209
+ except Exception as e:
210
+ print(f"Error in speech_to_text: {str(e)}")
211
+ return jsonify({'error': str(e)}), 400
212
  if __name__ == '__main__':
213
+ app.run(host='0.0.0.0', port=7860)