syedmudassir16 commited on
Commit
905a08e
·
verified ·
1 Parent(s): fb18002

dr_infer_voice_to_text

Browse files
Files changed (1) hide show
  1. app.py +102 -57
app.py CHANGED
@@ -1,10 +1,31 @@
1
- from huggingface_hub import InferenceClient
 
 
2
  import gradio as gr
3
 
4
- client = InferenceClient("mistralai/Mistral-7B-Instruct-v0.1")
5
-
6
- def format_prompt(message, history):
7
- fixed_prompt= """
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8
  You are a smart mood analyser, who determines user mood. Based on the user input, classify the mood of the user into one of the four moods {Happy, Sad, Instrumental, Party}. If you are finding it difficult to classify into one of these four moods, keep the conversation going on until we classify the user’s mood. Return a single-word reply from one of the options if you have classified. Suppose you classify a sentence as happy, then just respond with "happy".
9
 
10
  Note: Do not write anything else other than the classified mood if classified.
@@ -174,14 +195,43 @@ def format_prompt(message, history):
174
  User: Lets turn up the music and have some fun!
175
  LLM Response: Party
176
  """
177
- prompt = f"<s>{fixed_prompt}"
178
- for user_prompt, bot_response in history:
179
- prompt += f"\n User:{user_prompt}\n LLM Response:{bot_response}"
180
-
181
- # Add the current message
182
- prompt += f"\nUser: {message}\nLLM Response:"
183
- # breakpoint()
184
- return prompt
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
185
  def classify_mood(input_string):
186
  input_string = input_string.lower()
187
  mood_words = {"happy", "sad", "instrumental", "party"}
@@ -190,48 +240,43 @@ def classify_mood(input_string):
190
  return word, True
191
  return None, False
192
 
193
- def generate(
194
- prompt, history, temperature=0.1, max_new_tokens=2048, top_p=0.8, repetition_penalty=1.0,
195
- ):
196
- temperature = float(temperature)
197
- if temperature < 1e-2:
198
- temperature = 1e-2
199
- top_p = float(top_p)
200
-
201
- generate_kwargs = dict(
202
- temperature=temperature,
203
- max_new_tokens=max_new_tokens,
204
- top_p=top_p,
205
- repetition_penalty=repetition_penalty,
206
- do_sample=True,
207
- seed=42,
208
- )
209
-
210
- formatted_prompt = format_prompt(prompt, history)
211
-
212
- stream = client.text_generation(formatted_prompt, **generate_kwargs, stream=True, details=True, return_full_text=False)
213
- output = ""
214
-
215
- for response in stream:
216
- output += response.token.text
217
- mood, is_classified = classify_mood(output)
218
- # Print the chatbot's response
219
  if is_classified:
220
- print("Chatbot:", mood.capitalize())
221
- playlist_message = f"Playing {mood.capitalize()} playlist for you!"
222
- output=playlist_message
223
- return output
224
- # yield output
225
- return output
226
-
227
-
228
-
229
- demo = gr.ChatInterface(fn=generate,
230
- title="Mood-Based Music Recommender",
231
- retry_btn=None,
232
- undo_btn=None,
233
- clear_btn=None,
234
- description="<span style='font-size: larger; font-weight: bold;'>Hi! I'm a music recommender app. What kind of music do you want to listen to, or how are you feeling today?</span>",
235
- )
236
-
237
- demo.queue().launch()
 
1
+ import argparse
2
+ import torch
3
+ from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig, pipeline
4
  import gradio as gr
5
 
6
+ class llmChatbot:
7
+ def __init__(self, model_name, temperature=0.3, max_new_tokens=256, top_p=0.95, repetition_penalty=1.0):
8
+ # Specify how to quantize the model
9
+ quantization_config = BitsAndBytesConfig(
10
+ load_in_4bit=True,
11
+ bnb_4bit_quant_type="nf4",
12
+ bnb_4bit_compute_dtype="float16", # Use the string "float16" instead of torch.float16
13
+ )
14
+
15
+ self.model = AutoModelForCausalLM.from_pretrained(model_name, quantization_config=quantization_config, device_map="auto")
16
+ self.tokenizer = AutoTokenizer.from_pretrained(model_name)
17
+
18
+ # Set pad_token to eos_token if not already set
19
+ if (self.tokenizer.pad_token is None):
20
+ self.tokenizer.pad_token = self.tokenizer.eos_token
21
+
22
+ self.temperature = temperature
23
+ self.max_new_tokens = max_new_tokens
24
+ self.top_p = top_p
25
+ self.repetition_penalty = repetition_penalty
26
+
27
+ def format_prompt(self, message, history):
28
+ fixed_prompt = """
29
  You are a smart mood analyser, who determines user mood. Based on the user input, classify the mood of the user into one of the four moods {Happy, Sad, Instrumental, Party}. If you are finding it difficult to classify into one of these four moods, keep the conversation going on until we classify the user’s mood. Return a single-word reply from one of the options if you have classified. Suppose you classify a sentence as happy, then just respond with "happy".
30
 
31
  Note: Do not write anything else other than the classified mood if classified.
 
195
  User: Lets turn up the music and have some fun!
196
  LLM Response: Party
197
  """
198
+
199
+ # Start with the fixed prompt
200
+ prompt = f"<s>{fixed_prompt}"
201
+
202
+ # Append the conversation history
203
+ for user_prompt, bot_response in history:
204
+ prompt += f"\nUser: {user_prompt}\nLLM Response: {bot_response}"
205
+
206
+ # Add the current message
207
+ prompt += f"\nUser: {message}\nLLM Response:"
208
+
209
+ return prompt
210
+
211
+ def generate(self, message, history, temperature=None, max_new_tokens=None, top_p=None, repetition_penalty=None):
212
+ if temperature is None:
213
+ temperature = self.temperature
214
+ if max_new_tokens is None:
215
+ max_new_tokens = self.max_new_tokens
216
+ if top_p is None:
217
+ top_p = self.top_p
218
+ if repetition_penalty is None:
219
+ repetition_penalty = self.repetition_penalty
220
+
221
+ prompt = self.format_prompt(message, history)
222
+ inputs = self.tokenizer(prompt, return_tensors="pt", padding=True).to("cuda")
223
+ generate_kwargs = dict(
224
+ temperature=temperature,
225
+ max_new_tokens=max_new_tokens,
226
+ top_p=top_p,
227
+ repetition_penalty=repetition_penalty,
228
+ do_sample=True,
229
+ pad_token_id=self.tokenizer.pad_token_id, # Explicitly set the pad_token_id
230
+ )
231
+ output_ids = self.model.generate(**inputs, **generate_kwargs)
232
+ output = self.tokenizer.decode(output_ids[0], skip_special_tokens=True)
233
+ return output[len(prompt):].strip()
234
+
235
  def classify_mood(input_string):
236
  input_string = input_string.lower()
237
  mood_words = {"happy", "sad", "instrumental", "party"}
 
240
  return word, True
241
  return None, False
242
 
243
+ def speech_to_text(speech):
244
+ asr = pipeline("automatic-speech-recognition", model="facebook/wav2vec2-base-960h")
245
+ text = asr(speech)["text"]
246
+ return text
247
+
248
+ def text_to_speech(text):
249
+ tts = pipeline("text-to-speech", model="facebook/fastspeech2-en-ljspeech")
250
+ speech = tts(text)["audio"]
251
+ return speech
252
+
253
+ if __name__ == "__main__":
254
+ parser = argparse.ArgumentParser(description="Start the Mistral chatbot application.")
255
+ parser.add_argument("--model_name", type=str, default="mistralai/Mistral-7B-Instruct-v0.2", help="The name of the model to use.")
256
+
257
+ args = parser.parse_args()
258
+ model_name = args.model_name
259
+
260
+ # Instantiate the chatbot with necessary parameters
261
+ mistral_chatbot = llmChatbot(model_name=model_name)
262
+ history = []
263
+ print("How are you doing today?")
264
+
265
+ def chatbot_response(audio_input):
266
+ text_input = speech_to_text(audio_input)
267
+ result = mistral_chatbot.generate(text_input, history)
268
+ mood, is_classified = classify_mood(result)
269
  if is_classified:
270
+ response_text = mood.capitalize()
271
+ else:
272
+ response_text = result
273
+ audio_output = text_to_speech(response_text)
274
+ history.append((text_input, response_text))
275
+ return audio_output, response_text
276
+
277
+ gr.Interface(
278
+ fn=chatbot_response,
279
+ inputs=gr.Audio(source="microphone", type="filepath"),
280
+ outputs=[gr.Audio(type="numpy"), "text"],
281
+ live=True
282
+ ).launch()