syedmudassir16 commited on
Commit
09d6c8d
·
verified ·
1 Parent(s): e0e2265
Files changed (1) hide show
  1. app.py +61 -96
app.py CHANGED
@@ -1,70 +1,10 @@
1
  from huggingface_hub import InferenceClient
2
- from transformers import pipeline
3
  import gradio as gr
4
- import edge_tts
5
- import tempfile
6
- import os
7
- from streaming_stt_nemo import Model
8
- import torch
9
- import random
10
-
11
- # Initialize the inference client with your Hugging Face token
12
- client = InferenceClient("mistralai/Mistral-7B-Instruct-v0.1")
13
- # Initialize the ASR pipeline
14
- asr = pipeline("automatic-speech-recognition", "facebook/wav2vec2-base-960h")
15
-
16
- def speech_to_text(speech):
17
- """Converts speech to text using the ASR pipeline."""
18
- # breakpoint()
19
- return asr(speech)["text"]
20
-
21
-
22
- def classify_mood(input_string):
23
- """Classifies the mood based on keywords in the input string."""
24
- input_string = input_string.lower()
25
- mood_words = {"happy", "sad", "instrumental", "party"}
26
- for word in mood_words:
27
- if word in input_string:
28
- return word, True
29
- return None, False
30
 
31
- def generate(
32
- prompt, history, temperature=0.1, max_new_tokens=2048, top_p=0.8, repetition_penalty=1.0,
33
- ):
34
- temperature = float(temperature)
35
- if temperature < 1e-2:
36
- temperature = 1e-2
37
- top_p = float(top_p)
38
-
39
- generate_kwargs = dict(
40
- temperature=temperature,
41
- max_new_tokens=max_new_tokens,
42
- top_p=top_p,
43
- repetition_penalty=repetition_penalty,
44
- do_sample=True,
45
- seed=42,
46
- )
47
-
48
- formatted_prompt = format_prompt(prompt, history)
49
-
50
- stream = client.text_generation(formatted_prompt, **generate_kwargs, stream=True, details=True, return_full_text=False)
51
- output = ""
52
-
53
- for response in stream:
54
- output += response.token.text
55
- mood, is_classified = classify_mood(output)
56
- # Print the chatbot's response
57
- if is_classified:
58
- print("Chatbot:", mood.capitalize())
59
- playlist_message = f"Playing {mood.capitalize()} playlist for you!"
60
- output=playlist_message
61
- return output
62
- # yield output
63
- return output
64
 
65
  def format_prompt(message, history):
66
- """Formats the prompt including fixed instructions and conversation history."""
67
- fixed_prompt = """
68
  You are a smart mood analyser, who determines user mood. Based on the user input, classify the mood of the user into one of the four moods {Happy, Sad, Instrumental, Party}. If you are finding it difficult to classify into one of these four moods, keep the conversation going on until we classify the user’s mood. Return a single-word reply from one of the options if you have classified. Suppose you classify a sentence as happy, then just respond with "happy".
69
 
70
  Note: Do not write anything else other than the classified mood if classified.
@@ -233,41 +173,66 @@ def format_prompt(message, history):
233
 
234
  User: Lets turn up the music and have some fun!
235
  LLM Response: Party
236
- """ # Include your fixed prompt and instructions here
237
- prompt = f"{fixed_prompt}"
238
  for user_prompt, bot_response in history:
239
- prompt += f"\nUser: {user_prompt}\nLLM Response: {bot_response}"
 
 
240
  prompt += f"\nUser: {message}\nLLM Response:"
 
241
  return prompt
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
242
 
243
- async def process_speech(speech_file):
244
- """Processes speech input to text and then calls generate."""
245
- text = speech_to_text(speech_file)
246
- reply = generate(text, history="")
247
- communicate = edge_tts.Communicate(reply)
248
- with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp_file:
249
- tmp_path = tmp_file.name
250
- await communicate.save(tmp_path)
251
- yield tmp_path
252
- DESCRIPTION = """ # <center><b>Mood-Based Music Recommender⚡</b></center>
253
- ### <center>Hi! I'm a music recommender app.
254
- ### <center>What kind of music do you want to listen to, or how are you feeling today?</center>
255
- """
256
- # Gradio interface setup
257
- with gr.Blocks(css="style.css") as demo:
258
- gr.Markdown(DESCRIPTION)
259
- with gr.Row():
260
- input = gr.Audio(label="User", sources="microphone", type="filepath", waveform_options=False)
261
- output = gr.Audio(label="AI", type="filepath",
262
- interactive=False,
263
- autoplay=True,
264
- elem_classes="audio")
265
- gr.Interface(
266
- batch=True,
267
- max_batch_size=10,
268
- fn=process_speech,
269
- inputs=[input],
270
- outputs=[output], live=True)
271
-
272
- if __name__ == "__main__":
273
- demo.queue(max_size=200).launch()
 
1
  from huggingface_hub import InferenceClient
 
2
  import gradio as gr
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3
 
4
+ client = InferenceClient("mistralai/Mistral-7B-Instruct-v0.1")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5
 
6
  def format_prompt(message, history):
7
+ fixed_prompt= """
 
8
  You are a smart mood analyser, who determines user mood. Based on the user input, classify the mood of the user into one of the four moods {Happy, Sad, Instrumental, Party}. If you are finding it difficult to classify into one of these four moods, keep the conversation going on until we classify the user’s mood. Return a single-word reply from one of the options if you have classified. Suppose you classify a sentence as happy, then just respond with "happy".
9
 
10
  Note: Do not write anything else other than the classified mood if classified.
 
173
 
174
  User: Lets turn up the music and have some fun!
175
  LLM Response: Party
176
+ """
177
+ prompt = f"<s>{fixed_prompt}"
178
  for user_prompt, bot_response in history:
179
+ prompt += f"\n User:{user_prompt}\n LLM Response:{bot_response}"
180
+
181
+ # Add the current message
182
  prompt += f"\nUser: {message}\nLLM Response:"
183
+ # breakpoint()
184
  return prompt
185
+ def classify_mood(input_string):
186
+ input_string = input_string.lower()
187
+ mood_words = {"happy", "sad", "instrumental", "party"}
188
+ for word in mood_words:
189
+ if word in input_string:
190
+ return word, True
191
+ return None, False
192
+
193
+ def generate(
194
+ prompt, history, temperature=0.1, max_new_tokens=2048, top_p=0.8, repetition_penalty=1.0,
195
+ ):
196
+ temperature = float(temperature)
197
+ if temperature < 1e-2:
198
+ temperature = 1e-2
199
+ top_p = float(top_p)
200
+
201
+ generate_kwargs = dict(
202
+ temperature=temperature,
203
+ max_new_tokens=max_new_tokens,
204
+ top_p=top_p,
205
+ repetition_penalty=repetition_penalty,
206
+ do_sample=True,
207
+ seed=42,
208
+ )
209
+
210
+ formatted_prompt = format_prompt(prompt, history)
211
+ breakpoint()
212
+
213
+ stream = client.text_generation(formatted_prompt, **generate_kwargs, stream=True, details=True, return_full_text=False)
214
+ output = ""
215
+
216
+ for response in stream:
217
+ output += response.token.text
218
+ mood, is_classified = classify_mood(output)
219
+ # Print the chatbot's response
220
+ if is_classified:
221
+ print("Chatbot:", mood.capitalize())
222
+ playlist_message = f"Playing {mood.capitalize()} playlist for you!"
223
+ output=playlist_message
224
+ return output
225
+ # yield output
226
+ return output
227
+
228
+
229
+
230
+ demo = gr.ChatInterface (fn=generate,
231
+ title="Mood-Based Music Recommender",
232
+ retry_btn=None,
233
+ undo_btn=None,
234
+ clear_btn=None,
235
+ description="<span style='font-size: larger; font-weight: bold;'>Hi! I'm a music recommender app. What kind of music do you want to listen to, or how are you feeling today?</span>",
236
+ )
237
 
238
+ demo.queue().launch()