arjunanand13 commited on
Commit
7ca0a26
·
verified ·
1 Parent(s): 873293f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +397 -263
app.py CHANGED
@@ -1,128 +1,31 @@
 
1
  import gradio as gr
2
- import requests
3
  from transformers import pipeline
4
  import edge_tts
5
- import tempfile
6
- import asyncio
7
- import os
8
- import json
9
- import time
10
- import logging
11
-
12
- # Set up logging
13
  logging.basicConfig(level=logging.INFO)
14
  logger = logging.getLogger(__name__)
15
 
16
- ENDPOINT_URL = "https://xzup8268xrmmxcma.us-east-1.aws.endpoints.huggingface.cloud/invocations"
17
- hf_token = os.getenv("HF_TOKEN")
18
-
19
- print(f"DEBUG: Starting application at {time.strftime('%Y-%m-%d %H:%M:%S')}")
20
- print(f"DEBUG: HF_TOKEN available: {bool(hf_token)}")
21
- print(f"DEBUG: Endpoint URL: {ENDPOINT_URL}")
22
-
23
- try:
24
- print("DEBUG: Loading ASR pipeline...")
25
- start_time = time.time()
26
- asr = pipeline("automatic-speech-recognition", "facebook/wav2vec2-base-960h")
27
- print(f"DEBUG: ASR pipeline loaded in {time.time() - start_time:.2f} seconds")
28
- except Exception as e:
29
- print(f"DEBUG: Error loading ASR pipeline: {e}")
30
- asr = None
31
-
32
- INITIAL_MESSAGE = "Hi! I'm your music buddy—tell me about your mood and the type of tunes you're in the mood for today!"
33
-
34
- def speech_to_text(speech):
35
- print(f"DEBUG: speech_to_text called with input: {speech is not None}")
36
- if speech is None:
37
- print("DEBUG: No speech input provided")
38
  return ""
39
-
40
- try:
41
- start_time = time.time()
42
- print("DEBUG: Starting speech recognition...")
43
- result = asr(speech)["text"]
44
- print(f"DEBUG: Speech recognition completed in {time.time() - start_time:.2f} seconds")
45
- print(f"DEBUG: Recognized text: '{result}'")
46
- return result
47
- except Exception as e:
48
- print(f"DEBUG: Error in speech_to_text: {e}")
49
- return ""
50
-
51
- def classify_mood(input_string):
52
- print(f"DEBUG: classify_mood called with: '{input_string}'")
53
- input_string = input_string.lower()
54
- mood_words = {"happy", "sad", "instrumental", "party"}
55
- for word in mood_words:
56
- if word in input_string:
57
- print(f"DEBUG: Mood classified as: {word}")
58
- return word, True
59
- print("DEBUG: No mood classified")
60
- return None, False
61
-
62
- def generate(prompt, history, temperature=0.1, max_new_tokens=2048):
63
- print(f"DEBUG: generate() called at {time.strftime('%H:%M:%S')}")
64
- print(f"DEBUG: Prompt length: {len(prompt)}")
65
- print(f"DEBUG: History length: {len(history)}")
66
-
67
- if not hf_token:
68
- error_msg = "Error: Hugging Face authentication required. Please set your HF_TOKEN."
69
- print(f"DEBUG: {error_msg}")
70
- return error_msg
71
-
72
- try:
73
- print("DEBUG: Formatting prompt...")
74
- start_time = time.time()
75
- formatted_prompt = format_prompt(prompt, history)
76
- print(f"DEBUG: Prompt formatted in {time.time() - start_time:.2f} seconds")
77
- print(f"DEBUG: Formatted prompt length: {len(formatted_prompt)}")
78
-
79
- headers = {"Authorization": f"Bearer {hf_token}", "Content-Type": "application/json"}
80
- payload = {
81
- "inputs": formatted_prompt,
82
- "parameters": {
83
- "temperature": temperature,
84
- "max_new_tokens": max_new_tokens
85
- }
86
- }
87
-
88
- print("DEBUG: Making API request...")
89
- api_start_time = time.time()
90
- response = requests.post(ENDPOINT_URL, headers=headers, json=payload, timeout=60)
91
- api_duration = time.time() - api_start_time
92
- print(f"DEBUG: API request completed in {api_duration:.2f} seconds")
93
- print(f"DEBUG: Response status code: {response.status_code}")
94
-
95
- if response.status_code == 200:
96
- print("DEBUG: Parsing API response...")
97
- result = response.json()
98
- output = result[0]["generated_text"]
99
-
100
- print(f"DEBUG: Generated output: '{output[:100]}...'")
101
-
102
- mood, is_classified = classify_mood(output)
103
- if is_classified:
104
- playlist_message = f"Playing {mood.capitalize()} playlist for you!"
105
- print(f"DEBUG: Returning playlist message: {playlist_message}")
106
- return playlist_message
107
-
108
- print(f"DEBUG: Returning generated output")
109
- return output
110
- else:
111
- error_msg = f"Error: {response.status_code} - {response.text}"
112
- print(f"DEBUG: API error: {error_msg}")
113
- return error_msg
114
-
115
- except requests.exceptions.Timeout:
116
- error_msg = "Error: API request timed out after 60 seconds"
117
- print(f"DEBUG: {error_msg}")
118
- return error_msg
119
- except Exception as e:
120
- error_msg = f"Error generating response: {str(e)}"
121
- print(f"DEBUG: Exception in generate(): {error_msg}")
122
- return error_msg
123
 
 
124
  def format_prompt(message, history):
125
- print("DEBUG: format_prompt called")
126
  fixed_prompt = """
127
  You are a smart mood analyzer tasked with determining the user's mood for a music recommendation system. Your goal is to classify the user's mood into one of four categories: Happy, Sad, Instrumental, or Party.
128
  Instructions:
@@ -137,172 +40,403 @@ def format_prompt(message, history):
137
  Remember: Your primary goal is mood classification. Stay on topic and guide the conversation towards understanding the user's emotional state.
138
  """
139
  prompt = f"{fixed_prompt}\n"
140
-
141
- for i, (user_prompt, bot_response) in enumerate(history):
142
- prompt += f"User: {user_prompt}\nAssistant: {bot_response}\n"
143
  if i == 3:
144
  prompt += "Note: This is the last exchange. Classify the mood if possible or respond with 'Unclear'.\n"
145
-
146
  prompt += f"User: {message}\nAssistant:"
147
- print(f"DEBUG: Final prompt length: {len(prompt)}")
148
  return prompt
149
 
150
- async def text_to_speech(text):
151
- print(f"DEBUG: text_to_speech called with text length: {len(text)}")
152
- try:
153
- start_time = time.time()
154
- print("DEBUG: Creating TTS communicate object...")
155
- communicate = edge_tts.Communicate(text)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
156
 
157
- print("DEBUG: Creating temporary file...")
158
- with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp_file:
159
- tmp_path = tmp_file.name
160
- print(f"DEBUG: Saving TTS to: {tmp_path}")
161
- await communicate.save(tmp_path)
 
 
 
162
 
163
- duration = time.time() - start_time
164
- print(f"DEBUG: TTS completed in {duration:.2f} seconds")
165
- print(f"DEBUG: TTS file size: {os.path.getsize(tmp_path) if os.path.exists(tmp_path) else 'File not found'}")
166
- return tmp_path
167
- except Exception as e:
168
- print(f"DEBUG: TTS Error: {e}")
169
- return None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
170
 
171
- def process_input(input_text, history):
172
- print(f"DEBUG: process_input called with text: '{input_text[:50]}...'")
173
- if not input_text:
174
- print("DEBUG: No input text provided")
175
- return history, history, ""
 
 
 
 
 
 
 
 
 
 
 
176
 
177
- print("DEBUG: Calling generate function...")
178
- start_time = time.time()
179
- response = generate(input_text, history)
180
- duration = time.time() - start_time
181
- print(f"DEBUG: generate() completed in {duration:.2f} seconds")
182
- print(f"DEBUG: Response: '{response[:100]}...'")
183
 
184
- history.append((input_text, response))
185
- print(f"DEBUG: Updated history length: {len(history)}")
186
- return history, history, ""
187
 
188
- async def generate_audio(history):
189
- print(f"DEBUG: generate_audio called with history length: {len(history)}")
190
- if history and len(history) > 0:
191
- last_response = history[-1][1]
192
- print(f"DEBUG: Generating audio for: '{last_response[:50]}...'")
193
- start_time = time.time()
194
- audio_path = await text_to_speech(last_response)
195
- duration = time.time() - start_time
196
- print(f"DEBUG: Audio generation completed in {duration:.2f} seconds")
197
- return audio_path
198
- print("DEBUG: No history available for audio generation")
199
- return None
200
-
201
- async def init_chat():
202
- print("DEBUG: init_chat called")
203
- try:
204
- history = [("", INITIAL_MESSAGE)]
205
- print("DEBUG: Generating initial audio...")
206
- start_time = time.time()
207
- audio_path = await text_to_speech(INITIAL_MESSAGE)
208
- duration = time.time() - start_time
209
- print(f"DEBUG: Initial audio generated in {duration:.2f} seconds")
210
- print("DEBUG: init_chat completed successfully")
211
- return history, history, audio_path
212
- except Exception as e:
213
- print(f"DEBUG: Error in init_chat: {e}")
214
- return [("", INITIAL_MESSAGE)], [("", INITIAL_MESSAGE)], None
215
-
216
- def handle_voice_upload(audio_file):
217
- print(f"DEBUG: handle_voice_upload called with file: {audio_file}")
218
- if audio_file is None:
219
- print("DEBUG: No audio file provided")
220
- return ""
221
 
222
- try:
223
- start_time = time.time()
224
- result = speech_to_text(audio_file)
225
- duration = time.time() - start_time
226
- print(f"DEBUG: Voice upload processing completed in {duration:.2f} seconds")
227
- return result
228
- except Exception as e:
229
- print(f"DEBUG: Error in handle_voice_upload: {e}")
230
- return ""
 
231
 
232
- print("DEBUG: Creating Gradio interface...")
 
 
 
 
 
 
 
 
 
 
 
233
 
234
- with gr.Blocks() as demo:
235
- gr.Markdown("# Mood-Based Music Recommender with Continuous Voice Chat")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
236
 
237
- chatbot = gr.Chatbot()
238
 
239
- with gr.Row():
240
- msg = gr.Textbox(
241
- placeholder="Type your message here...",
242
- label="Text Input",
243
- scale=4
244
- )
245
- submit = gr.Button("Send", scale=1)
246
 
247
- with gr.Row():
248
- voice_input = gr.Audio(
249
- label="🎤 Record your voice or upload audio file",
250
- sources=["microphone", "upload"],
251
- type="filepath"
252
- )
253
 
254
- audio_output = gr.Audio(label="AI Response", autoplay=True)
255
 
256
- state = gr.State([])
 
 
 
 
257
 
258
- print("DEBUG: Setting up Gradio event handlers...")
259
-
260
- demo.load(init_chat, outputs=[state, chatbot, audio_output])
261
-
262
- def submit_and_generate_audio(input_text, history):
263
- print(f"DEBUG: submit_and_generate_audio called at {time.strftime('%H:%M:%S')}")
264
- start_time = time.time()
265
- new_state, new_chatbot, empty_msg = process_input(input_text, history)
266
- duration = time.time() - start_time
267
- print(f"DEBUG: submit_and_generate_audio completed in {duration:.2f} seconds")
268
- return new_state, new_chatbot, empty_msg
269
-
270
- msg.submit(
271
- submit_and_generate_audio,
272
- inputs=[msg, state],
273
- outputs=[state, chatbot, msg]
274
- ).then(
275
- generate_audio,
276
- inputs=[state],
277
- outputs=[audio_output]
278
- )
279
 
280
- submit.click(
281
- submit_and_generate_audio,
282
- inputs=[msg, state],
283
- outputs=[state, chatbot, msg]
284
- ).then(
285
- generate_audio,
286
- inputs=[state],
287
- outputs=[audio_output]
288
- )
289
-
290
- voice_input.upload(
291
- handle_voice_upload,
292
- inputs=[voice_input],
293
- outputs=[msg]
294
- ).then(
295
- submit_and_generate_audio,
296
- inputs=[msg, state],
297
- outputs=[state, chatbot, msg]
298
- ).then(
299
- generate_audio,
300
- inputs=[state],
301
- outputs=[audio_output]
302
- )
303
-
304
- print("DEBUG: Gradio interface created successfully")
305
 
306
- if __name__ == "__main__":
307
- print("DEBUG: Launching Gradio app...")
308
- demo.launch(share=True, debug=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os, time, requests, tempfile, asyncio, logging
2
  import gradio as gr
 
3
  from transformers import pipeline
4
  import edge_tts
5
+ from collections import Counter
6
+
7
+ # ─── Configuration ──────────────────────────────────────────────────────────────
8
+ ENDPOINT_URL = "https://xzup8268xrmmxcma.us-east-1.aws.endpoints.huggingface.cloud/invocations"
9
+ HF_TOKEN = os.getenv("HF_TOKEN")
10
+
 
 
11
  logging.basicConfig(level=logging.INFO)
12
  logger = logging.getLogger(__name__)
13
 
14
+ # ─── Helpers ───────────────────────────────────────────────────────────────────
15
+ # 1) Speech→Text
16
+ asr = pipeline("automatic-speech-recognition", "facebook/wav2vec2-base-960h")
17
+ def speech_to_text(audio):
18
+ if not audio:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
19
  return ""
20
+ # Gradio supplies a tuple (sr, ndarray)
21
+ if isinstance(audio, tuple):
22
+ sr, arr = audio
23
+ return asr(arr, sampling_rate=sr)["text"]
24
+ # filepath
25
+ return asr(audio)["text"]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
26
 
27
+ # 2) Prompt formatting
28
  def format_prompt(message, history):
 
29
  fixed_prompt = """
30
  You are a smart mood analyzer tasked with determining the user's mood for a music recommendation system. Your goal is to classify the user's mood into one of four categories: Happy, Sad, Instrumental, or Party.
31
  Instructions:
 
40
  Remember: Your primary goal is mood classification. Stay on topic and guide the conversation towards understanding the user's emotional state.
41
  """
42
  prompt = f"{fixed_prompt}\n"
43
+ for i, (u, b) in enumerate(history):
44
+ prompt += f"User: {u}\nAssistant: {b}\n"
 
45
  if i == 3:
46
  prompt += "Note: This is the last exchange. Classify the mood if possible or respond with 'Unclear'.\n"
 
47
  prompt += f"User: {message}\nAssistant:"
 
48
  return prompt
49
 
50
+ # 3) Call HF Invocation Endpoint
51
+ def query_model(prompt, max_new_tokens=64, temperature=0.1):
52
+ headers = {
53
+ "Authorization": f"Bearer {HF_TOKEN}",
54
+ "Content-Type": "application/json",
55
+ }
56
+ payload = {
57
+ "inputs": prompt,
58
+ "parameters": {"max_new_tokens": max_new_tokens, "temperature": temperature},
59
+ }
60
+ resp = requests.post(ENDPOINT_URL, headers=headers, json=payload, timeout=30)
61
+ resp.raise_for_status()
62
+ return resp.json()[0]["generated_text"]
63
+
64
+ # 4) Aggregate mood from history
65
+ def aggregate_mood_from_history(history):
66
+ mood_words = {"happy", "sad", "instrumental", "party"}
67
+ counts = Counter()
68
+ for _, bot_response in history:
69
+ for tok in bot_response.split():
70
+ w = tok.strip('.,?!;"\'').lower()
71
+ if w in mood_words:
72
+ counts[w] += 1
73
+ if not counts:
74
+ return None
75
+ return counts.most_common(1)[0][0]
76
+
77
+ # 5) Text→Speech
78
+ def text_to_speech(text):
79
+ communicate = edge_tts.Communicate(text)
80
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp:
81
+ # save synchronously to simplify callback
82
+ asyncio.get_event_loop().run_until_complete(communicate.save(tmp.name))
83
+ return tmp.name
84
+
85
+ # ─── Gradio Callbacks ───────────────────────────────────────────────────────────
86
+ def user_turn(user_input, history):
87
+ history = history + [(user_input, None)]
88
+ formatted = format_prompt(user_input, history)
89
+ raw = query_model(formatted)
90
+ # temporarily assign raw
91
+ history[-1] = (user_input, raw)
92
+ # aggregate mood
93
+ mood = aggregate_mood_from_history(history)
94
+ if mood:
95
+ reply = f"Playing {mood.capitalize()} playlist for you!"
96
+ else:
97
+ reply = raw
98
+ history[-1] = (user_input, reply)
99
+ return history, history, ""
100
+
101
+ async def bot_audio(history):
102
+ last = history[-1][1]
103
+ return text_to_speech(last)
104
+
105
+ def speech_callback(audio):
106
+ return speech_to_text(audio)
107
+
108
+ # ─── Build the Interface ────────────────────────────────────────────────────────
109
+ with gr.Blocks() as demo:
110
+ gr.Markdown("## 🎵 Mood-Based Music Buddy")
111
+ chat = gr.Chatbot()
112
+ txt = gr.Textbox(placeholder="Type your mood...", label="Text")
113
+ send = gr.Button("Send")
114
+ mic = gr.Audio()
115
+ out_audio = gr.Audio(label="Response (Audio)", autoplay=True)
116
+ state = gr.State([])
117
+
118
+ def init():
119
+ greeting = "Hi! I'm your music buddy—tell me how you’re feeling today."
120
+ return [("", greeting)], [("", greeting)], None
121
+ demo.load(init, outputs=[state, chat, out_audio])
122
+
123
+ txt.submit(user_turn, [txt, state], [state, chat, txt])\
124
+ .then(bot_audio, [state], [out_audio])
125
+ send.click(user_turn, [txt, state], [state, chat, txt])\
126
+ .then(bot_audio, [state], [out_audio])
127
+
128
+ mic.change(speech_callback, [mic], [txt])\
129
+ .then(user_turn, [txt, state], [state, chat, txt])\
130
+ .then(bot_audio, [state], [out_audio])
131
+
132
+ if __name__ == "__main__":
133
+ demo.launch(debug=True)
134
+
135
+ # import gradio as gr
136
+ # import requests
137
+ # from transformers import pipeline
138
+ # import edge_tts
139
+ # import tempfile
140
+ # import asyncio
141
+ # import os
142
+ # import json
143
+ # import time
144
+ # import logging
145
+
146
+ # # Set up logging
147
+ # logging.basicConfig(level=logging.INFO)
148
+ # logger = logging.getLogger(__name__)
149
+
150
+ # ENDPOINT_URL = "https://xzup8268xrmmxcma.us-east-1.aws.endpoints.huggingface.cloud/invocations"
151
+ # hf_token = os.getenv("HF_TOKEN")
152
+
153
+ # print(f"DEBUG: Starting application at {time.strftime('%Y-%m-%d %H:%M:%S')}")
154
+ # print(f"DEBUG: HF_TOKEN available: {bool(hf_token)}")
155
+ # print(f"DEBUG: Endpoint URL: {ENDPOINT_URL}")
156
+
157
+ # try:
158
+ # print("DEBUG: Loading ASR pipeline...")
159
+ # start_time = time.time()
160
+ # asr = pipeline("automatic-speech-recognition", "facebook/wav2vec2-base-960h")
161
+ # print(f"DEBUG: ASR pipeline loaded in {time.time() - start_time:.2f} seconds")
162
+ # except Exception as e:
163
+ # print(f"DEBUG: Error loading ASR pipeline: {e}")
164
+ # asr = None
165
+
166
+ # INITIAL_MESSAGE = "Hi! I'm your music buddy—tell me about your mood and the type of tunes you're in the mood for today!"
167
+
168
+ # def speech_to_text(speech):
169
+ # print(f"DEBUG: speech_to_text called with input: {speech is not None}")
170
+ # if speech is None:
171
+ # print("DEBUG: No speech input provided")
172
+ # return ""
173
+
174
+ # try:
175
+ # start_time = time.time()
176
+ # print("DEBUG: Starting speech recognition...")
177
+ # result = asr(speech)["text"]
178
+ # print(f"DEBUG: Speech recognition completed in {time.time() - start_time:.2f} seconds")
179
+ # print(f"DEBUG: Recognized text: '{result}'")
180
+ # return result
181
+ # except Exception as e:
182
+ # print(f"DEBUG: Error in speech_to_text: {e}")
183
+ # return ""
184
+
185
+ # def classify_mood(input_string):
186
+ # print(f"DEBUG: classify_mood called with: '{input_string}'")
187
+ # input_string = input_string.lower()
188
+ # mood_words = {"happy", "sad", "instrumental", "party"}
189
+ # for word in mood_words:
190
+ # if word in input_string:
191
+ # print(f"DEBUG: Mood classified as: {word}")
192
+ # return word, True
193
+ # print("DEBUG: No mood classified")
194
+ # return None, False
195
+
196
+ # def generate(prompt, history, temperature=0.1, max_new_tokens=2048):
197
+ # print(f"DEBUG: generate() called at {time.strftime('%H:%M:%S')}")
198
+ # print(f"DEBUG: Prompt length: {len(prompt)}")
199
+ # print(f"DEBUG: History length: {len(history)}")
200
+
201
+ # if not hf_token:
202
+ # error_msg = "Error: Hugging Face authentication required. Please set your HF_TOKEN."
203
+ # print(f"DEBUG: {error_msg}")
204
+ # return error_msg
205
+
206
+ # try:
207
+ # print("DEBUG: Formatting prompt...")
208
+ # start_time = time.time()
209
+ # formatted_prompt = format_prompt(prompt, history)
210
+ # print(f"DEBUG: Prompt formatted in {time.time() - start_time:.2f} seconds")
211
+ # print(f"DEBUG: Formatted prompt length: {len(formatted_prompt)}")
212
 
213
+ # headers = {"Authorization": f"Bearer {hf_token}", "Content-Type": "application/json"}
214
+ # payload = {
215
+ # "inputs": formatted_prompt,
216
+ # "parameters": {
217
+ # "temperature": temperature,
218
+ # "max_new_tokens": max_new_tokens
219
+ # }
220
+ # }
221
 
222
+ # print("DEBUG: Making API request...")
223
+ # api_start_time = time.time()
224
+ # response = requests.post(ENDPOINT_URL, headers=headers, json=payload, timeout=60)
225
+ # api_duration = time.time() - api_start_time
226
+ # print(f"DEBUG: API request completed in {api_duration:.2f} seconds")
227
+ # print(f"DEBUG: Response status code: {response.status_code}")
228
+
229
+ # if response.status_code == 200:
230
+ # print("DEBUG: Parsing API response...")
231
+ # result = response.json()
232
+ # output = result[0]["generated_text"]
233
+
234
+ # print(f"DEBUG: Generated output: '{output[:100]}...'")
235
+
236
+ # mood, is_classified = classify_mood(output)
237
+ # if is_classified:
238
+ # playlist_message = f"Playing {mood.capitalize()} playlist for you!"
239
+ # print(f"DEBUG: Returning playlist message: {playlist_message}")
240
+ # return playlist_message
241
+
242
+ # print(f"DEBUG: Returning generated output")
243
+ # return output
244
+ # else:
245
+ # error_msg = f"Error: {response.status_code} - {response.text}"
246
+ # print(f"DEBUG: API error: {error_msg}")
247
+ # return error_msg
248
+
249
+ # except requests.exceptions.Timeout:
250
+ # error_msg = "Error: API request timed out after 60 seconds"
251
+ # print(f"DEBUG: {error_msg}")
252
+ # return error_msg
253
+ # except Exception as e:
254
+ # error_msg = f"Error generating response: {str(e)}"
255
+ # print(f"DEBUG: Exception in generate(): {error_msg}")
256
+ # return error_msg
257
 
258
+ # def format_prompt(message, history):
259
+ # print("DEBUG: format_prompt called")
260
+ # fixed_prompt = """
261
+ # You are a smart mood analyzer tasked with determining the user's mood for a music recommendation system. Your goal is to classify the user's mood into one of four categories: Happy, Sad, Instrumental, or Party.
262
+ # Instructions:
263
+ # 1. Engage in a conversation with the user to understand their mood.
264
+ # 2. Ask relevant questions to guide the conversation towards mood classification.
265
+ # 3. If the user's mood is clear, respond with a single word: "Happy", "Sad", "Instrumental", or "Party".
266
+ # 4. If the mood is unclear, continue the conversation with a follow-up question.
267
+ # 5. Limit the conversation to a maximum of 5 exchanges.
268
+ # 6. Do not classify the mood prematurely if it's not evident from the user's responses.
269
+ # 7. Focus on the user's emotional state rather than specific activities or preferences.
270
+ # 8. If unable to classify after 5 exchanges, respond with "Unclear" to indicate the need for more information.
271
+ # Remember: Your primary goal is mood classification. Stay on topic and guide the conversation towards understanding the user's emotional state.
272
+ # """
273
+ # prompt = f"{fixed_prompt}\n"
274
 
275
+ # for i, (user_prompt, bot_response) in enumerate(history):
276
+ # prompt += f"User: {user_prompt}\nAssistant: {bot_response}\n"
277
+ # if i == 3:
278
+ # prompt += "Note: This is the last exchange. Classify the mood if possible or respond with 'Unclear'.\n"
 
 
279
 
280
+ # prompt += f"User: {message}\nAssistant:"
281
+ # print(f"DEBUG: Final prompt length: {len(prompt)}")
282
+ # return prompt
283
 
284
+ # async def text_to_speech(text):
285
+ # print(f"DEBUG: text_to_speech called with text length: {len(text)}")
286
+ # try:
287
+ # start_time = time.time()
288
+ # print("DEBUG: Creating TTS communicate object...")
289
+ # communicate = edge_tts.Communicate(text)
290
+
291
+ # print("DEBUG: Creating temporary file...")
292
+ # with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp_file:
293
+ # tmp_path = tmp_file.name
294
+ # print(f"DEBUG: Saving TTS to: {tmp_path}")
295
+ # await communicate.save(tmp_path)
296
+
297
+ # duration = time.time() - start_time
298
+ # print(f"DEBUG: TTS completed in {duration:.2f} seconds")
299
+ # print(f"DEBUG: TTS file size: {os.path.getsize(tmp_path) if os.path.exists(tmp_path) else 'File not found'}")
300
+ # return tmp_path
301
+ # except Exception as e:
302
+ # print(f"DEBUG: TTS Error: {e}")
303
+ # return None
304
+
305
+ # def process_input(input_text, history):
306
+ # print(f"DEBUG: process_input called with text: '{input_text[:50]}...'")
307
+ # if not input_text:
308
+ # print("DEBUG: No input text provided")
309
+ # return history, history, ""
 
 
 
 
 
 
 
310
 
311
+ # print("DEBUG: Calling generate function...")
312
+ # start_time = time.time()
313
+ # response = generate(input_text, history)
314
+ # duration = time.time() - start_time
315
+ # print(f"DEBUG: generate() completed in {duration:.2f} seconds")
316
+ # print(f"DEBUG: Response: '{response[:100]}...'")
317
+
318
+ # history.append((input_text, response))
319
+ # print(f"DEBUG: Updated history length: {len(history)}")
320
+ # return history, history, ""
321
 
322
+ # async def generate_audio(history):
323
+ # print(f"DEBUG: generate_audio called with history length: {len(history)}")
324
+ # if history and len(history) > 0:
325
+ # last_response = history[-1][1]
326
+ # print(f"DEBUG: Generating audio for: '{last_response[:50]}...'")
327
+ # start_time = time.time()
328
+ # audio_path = await text_to_speech(last_response)
329
+ # duration = time.time() - start_time
330
+ # print(f"DEBUG: Audio generation completed in {duration:.2f} seconds")
331
+ # return audio_path
332
+ # print("DEBUG: No history available for audio generation")
333
+ # return None
334
 
335
+ # async def init_chat():
336
+ # print("DEBUG: init_chat called")
337
+ # try:
338
+ # history = [("", INITIAL_MESSAGE)]
339
+ # print("DEBUG: Generating initial audio...")
340
+ # start_time = time.time()
341
+ # audio_path = await text_to_speech(INITIAL_MESSAGE)
342
+ # duration = time.time() - start_time
343
+ # print(f"DEBUG: Initial audio generated in {duration:.2f} seconds")
344
+ # print("DEBUG: init_chat completed successfully")
345
+ # return history, history, audio_path
346
+ # except Exception as e:
347
+ # print(f"DEBUG: Error in init_chat: {e}")
348
+ # return [("", INITIAL_MESSAGE)], [("", INITIAL_MESSAGE)], None
349
+
350
+ # def handle_voice_upload(audio_file):
351
+ # print(f"DEBUG: handle_voice_upload called with file: {audio_file}")
352
+ # if audio_file is None:
353
+ # print("DEBUG: No audio file provided")
354
+ # return ""
355
+
356
+ # try:
357
+ # start_time = time.time()
358
+ # result = speech_to_text(audio_file)
359
+ # duration = time.time() - start_time
360
+ # print(f"DEBUG: Voice upload processing completed in {duration:.2f} seconds")
361
+ # return result
362
+ # except Exception as e:
363
+ # print(f"DEBUG: Error in handle_voice_upload: {e}")
364
+ # return ""
365
+
366
+ # print("DEBUG: Creating Gradio interface...")
367
+
368
+ # with gr.Blocks() as demo:
369
+ # gr.Markdown("# Mood-Based Music Recommender with Continuous Voice Chat")
370
 
371
+ # chatbot = gr.Chatbot()
372
 
373
+ # with gr.Row():
374
+ # msg = gr.Textbox(
375
+ # placeholder="Type your message here...",
376
+ # label="Text Input",
377
+ # scale=4
378
+ # )
379
+ # submit = gr.Button("Send", scale=1)
380
 
381
+ # with gr.Row():
382
+ # voice_input = gr.Audio(
383
+ # label="🎤 Record your voice or upload audio file",
384
+ # sources=["microphone", "upload"],
385
+ # type="filepath"
386
+ # )
387
 
388
+ # audio_output = gr.Audio(label="AI Response", autoplay=True)
389
 
390
+ # state = gr.State([])
391
+
392
+ # print("DEBUG: Setting up Gradio event handlers...")
393
+
394
+ # demo.load(init_chat, outputs=[state, chatbot, audio_output])
395
 
396
+ # def submit_and_generate_audio(input_text, history):
397
+ # print(f"DEBUG: submit_and_generate_audio called at {time.strftime('%H:%M:%S')}")
398
+ # start_time = time.time()
399
+ # new_state, new_chatbot, empty_msg = process_input(input_text, history)
400
+ # duration = time.time() - start_time
401
+ # print(f"DEBUG: submit_and_generate_audio completed in {duration:.2f} seconds")
402
+ # return new_state, new_chatbot, empty_msg
403
+
404
+ # msg.submit(
405
+ # submit_and_generate_audio,
406
+ # inputs=[msg, state],
407
+ # outputs=[state, chatbot, msg]
408
+ # ).then(
409
+ # generate_audio,
410
+ # inputs=[state],
411
+ # outputs=[audio_output]
412
+ # )
 
 
 
 
413
 
414
+ # submit.click(
415
+ # submit_and_generate_audio,
416
+ # inputs=[msg, state],
417
+ # outputs=[state, chatbot, msg]
418
+ # ).then(
419
+ # generate_audio,
420
+ # inputs=[state],
421
+ # outputs=[audio_output]
422
+ # )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
423
 
424
+ # voice_input.upload(
425
+ # handle_voice_upload,
426
+ # inputs=[voice_input],
427
+ # outputs=[msg]
428
+ # ).then(
429
+ # submit_and_generate_audio,
430
+ # inputs=[msg, state],
431
+ # outputs=[state, chatbot, msg]
432
+ # ).then(
433
+ # generate_audio,
434
+ # inputs=[state],
435
+ # outputs=[audio_output]
436
+ # )
437
+
438
+ # print("DEBUG: Gradio interface created successfully")
439
+
440
+ # if __name__ == "__main__":
441
+ # print("DEBUG: Launching Gradio app...")
442
+ # demo.launch(share=True, debug=True)