anshharora commited on
Commit
dd1839a
·
verified ·
1 Parent(s): a2009a1

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +15 -122
app.py CHANGED
@@ -1,35 +1,26 @@
1
- from flask import Flask, request, jsonify, render_template
 
2
  from dotenv import load_dotenv
3
  from groq import Groq
4
  import os
5
  import uuid
6
- from gtts import gTTS
 
 
7
  import io
8
  import base64
 
9
  import speech_recognition as sr
10
- import tempfile
11
- import json
12
-
13
- try:
14
- import pyaudio
15
- except ImportError:
16
- print("Warning: PyAudio not available, speech functionality will be limited")
17
 
18
- # Initialize Flask app
19
  app = Flask(__name__, static_folder='static')
20
-
21
- # Load environment variables
22
  load_dotenv()
23
 
24
- # Groq API Configuration
25
  GROQ_API_KEY = os.getenv("GROQ_API_KEY")
26
  client = Groq(api_key=GROQ_API_KEY)
27
  MODEL = "llama3-70b-8192"
28
 
29
- # Initialize speech recognition
30
  recognizer = sr.Recognizer()
31
-
32
- # Store conversation history
33
  conversations = {}
34
 
35
  def load_base_prompt():
@@ -38,34 +29,27 @@ def load_base_prompt():
38
  return file.read().strip()
39
  except FileNotFoundError:
40
  print("Error: base_prompt.txt file not found.")
41
- return "You are a helpful assistant for language learning."
42
 
43
- # Load the base prompt
44
  base_prompt = load_base_prompt()
45
 
46
  def chat_with_groq(user_message, conversation_id=None):
47
  try:
48
- # Get conversation history or create new
49
  messages = conversations.get(conversation_id, [])
50
  if not messages:
51
  messages.append({"role": "system", "content": base_prompt})
52
 
53
- # Add user message
54
  messages.append({"role": "user", "content": user_message})
55
 
56
- # Get completion from Groq
57
  completion = client.chat.completions.create(
58
  model=MODEL,
59
  messages=messages,
60
  temperature=0.1,
61
- max_tokens=1024
62
  )
63
 
64
- # Add assistant's response to history
65
  assistant_message = completion.choices[0].message.content.strip()
66
  messages.append({"role": "assistant", "content": assistant_message})
67
 
68
- # Update conversation history
69
  if conversation_id:
70
  conversations[conversation_id] = messages
71
 
@@ -85,39 +69,6 @@ def text_to_speech(text):
85
  print(f"Error in text_to_speech: {str(e)}")
86
  return None
87
 
88
- def speech_to_text(audio_file):
89
- try:
90
- # Save the uploaded audio to a temporary file
91
- with tempfile.NamedTemporaryFile(delete=False, suffix='.wav') as temp_audio:
92
- audio_file.save(temp_audio.name)
93
-
94
- # Use SpeechRecognition to convert speech to text
95
- with sr.AudioFile(temp_audio.name) as source:
96
- # Adjust recognition settings
97
- recognizer.dynamic_energy_threshold = True
98
- recognizer.energy_threshold = 4000
99
-
100
- # Record the entire audio file
101
- audio = recognizer.record(source)
102
-
103
- # Perform recognition with increased timeout
104
- text = recognizer.recognize_google(audio, language='en-US')
105
- return text
106
-
107
- except sr.UnknownValueError:
108
- return "Could not understand audio"
109
- except sr.RequestError as e:
110
- return f"Could not request results; {str(e)}"
111
- except Exception as e:
112
- print(f"Error in speech_to_text: {str(e)}")
113
- return None
114
- finally:
115
- # Clean up temporary file
116
- try:
117
- os.unlink(temp_audio.name)
118
- except:
119
- pass
120
-
121
  @app.route('/')
122
  def index():
123
  return render_template('index.html')
@@ -128,86 +79,28 @@ def chat():
128
  data = request.get_json()
129
  user_message = data.get('message', '')
130
  conversation_id = data.get('conversation_id', str(uuid.uuid4()))
 
131
 
132
  if not user_message:
133
  return jsonify({'error': 'No message provided'}), 400
134
 
135
- # Get response from Groq
136
  response = chat_with_groq(user_message, conversation_id)
137
 
138
- # Generate voice response
139
- audio_io = text_to_speech(response)
140
  result = {
141
  'response': response,
142
  'conversation_id': conversation_id
143
  }
144
 
145
- if audio_io:
146
- audio_base64 = base64.b64encode(audio_io.getvalue()).decode('utf-8')
147
- result['voice_response'] = audio_base64
 
 
148
 
149
  return jsonify(result)
150
 
151
  except Exception as e:
152
  return jsonify({'error': str(e)}), 500
153
 
154
- @app.route('/api/voice', methods=['POST'])
155
- def handle_voice():
156
- try:
157
- if 'audio' not in request.files:
158
- return jsonify({'error': 'No audio file provided'}), 400
159
-
160
- audio_file = request.files['audio']
161
- conversation_id = request.form.get('conversation_id', str(uuid.uuid4()))
162
-
163
- # Save the audio file temporarily with a .wav extension
164
- with tempfile.NamedTemporaryFile(delete=False, suffix='.wav') as temp_audio:
165
- audio_file.save(temp_audio.name)
166
-
167
- # Use FFmpeg to convert the audio to the correct format
168
- output_path = temp_audio.name + '_converted.wav'
169
- os.system(f'ffmpeg -i {temp_audio.name} -acodec pcm_s16le -ac 1 -ar 16000 {output_path}')
170
-
171
- try:
172
- # Use the converted file for speech recognition
173
- with sr.AudioFile(output_path) as source:
174
- audio = recognizer.record(source)
175
- text = recognizer.recognize_google(audio)
176
-
177
- if not text:
178
- return jsonify({'error': 'Could not transcribe audio'}), 400
179
-
180
- # Get response from Groq
181
- response = chat_with_groq(text, conversation_id)
182
-
183
- # Generate voice response
184
- audio_io = text_to_speech(response)
185
- result = {
186
- 'text': text,
187
- 'response': response,
188
- 'conversation_id': conversation_id
189
- }
190
-
191
- if audio_io:
192
- audio_base64 = base64.b64encode(audio_io.getvalue()).decode('utf-8')
193
- result['voice_response'] = audio_base64
194
-
195
- return jsonify(result)
196
-
197
- finally:
198
- # Clean up temporary files
199
- try:
200
- os.remove(temp_audio.name)
201
- os.remove(output_path)
202
- except:
203
- pass
204
-
205
- except sr.UnknownValueError:
206
- return jsonify({'error': 'Could not understand audio'}), 400
207
- except sr.RequestError as e:
208
- return jsonify({'error': f'Could not request results: {str(e)}'}), 400
209
- except Exception as e:
210
- print(f"Error in speech_to_text: {str(e)}")
211
- return jsonify({'error': str(e)}), 400
212
  if __name__ == '__main__':
213
- app.run(host='0.0.0.0', port=7860)
 
1
+ # app.py
2
+ from flask import Flask, request, jsonify, render_template, send_file
3
  from dotenv import load_dotenv
4
  from groq import Groq
5
  import os
6
  import uuid
7
+ import tempfile
8
+ import sounddevice as sd
9
+ import numpy as np
10
  import io
11
  import base64
12
+ import wave
13
  import speech_recognition as sr
14
+ from gtts import gTTS
 
 
 
 
 
 
15
 
 
16
  app = Flask(__name__, static_folder='static')
 
 
17
  load_dotenv()
18
 
 
19
  GROQ_API_KEY = os.getenv("GROQ_API_KEY")
20
  client = Groq(api_key=GROQ_API_KEY)
21
  MODEL = "llama3-70b-8192"
22
 
 
23
  recognizer = sr.Recognizer()
 
 
24
  conversations = {}
25
 
26
  def load_base_prompt():
 
29
  return file.read().strip()
30
  except FileNotFoundError:
31
  print("Error: base_prompt.txt file not found.")
32
+ return "You are a helpful assistant."
33
 
 
34
  base_prompt = load_base_prompt()
35
 
36
  def chat_with_groq(user_message, conversation_id=None):
37
  try:
 
38
  messages = conversations.get(conversation_id, [])
39
  if not messages:
40
  messages.append({"role": "system", "content": base_prompt})
41
 
 
42
  messages.append({"role": "user", "content": user_message})
43
 
 
44
  completion = client.chat.completions.create(
45
  model=MODEL,
46
  messages=messages,
47
  temperature=0.1,
 
48
  )
49
 
 
50
  assistant_message = completion.choices[0].message.content.strip()
51
  messages.append({"role": "assistant", "content": assistant_message})
52
 
 
53
  if conversation_id:
54
  conversations[conversation_id] = messages
55
 
 
69
  print(f"Error in text_to_speech: {str(e)}")
70
  return None
71
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
72
  @app.route('/')
73
  def index():
74
  return render_template('index.html')
 
79
  data = request.get_json()
80
  user_message = data.get('message', '')
81
  conversation_id = data.get('conversation_id', str(uuid.uuid4()))
82
+ voice_output = data.get('voice_output', False)
83
 
84
  if not user_message:
85
  return jsonify({'error': 'No message provided'}), 400
86
 
 
87
  response = chat_with_groq(user_message, conversation_id)
88
 
 
 
89
  result = {
90
  'response': response,
91
  'conversation_id': conversation_id
92
  }
93
 
94
+ if voice_output:
95
+ audio_io = text_to_speech(response)
96
+ if audio_io:
97
+ audio_base64 = base64.b64encode(audio_io.getvalue()).decode('utf-8')
98
+ result['voice_response'] = audio_base64
99
 
100
  return jsonify(result)
101
 
102
  except Exception as e:
103
  return jsonify({'error': str(e)}), 500
104
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
105
  if __name__ == '__main__':
106
+ app.run(debug=True)