File size: 10,720 Bytes
905a08e
 
 
4ca8a7b
416cb92
905a08e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4ca8a7b
416cb92
4ca8a7b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
416cb92
4ca8a7b
 
416cb92
4ca8a7b
 
416cb92
4ca8a7b
 
416cb92
4ca8a7b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
905a08e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4ca8a7b
 
 
 
 
 
 
 
905a08e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4ca8a7b
905a08e
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
import argparse
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig, pipeline
import gradio as gr

class llmChatbot:
    def __init__(self, model_name, temperature=0.3, max_new_tokens=256, top_p=0.95, repetition_penalty=1.0):
        # Specify how to quantize the model
        quantization_config = BitsAndBytesConfig(
            load_in_4bit=True,
            bnb_4bit_quant_type="nf4",
            bnb_4bit_compute_dtype="float16",  # Use the string "float16" instead of torch.float16
        )
        
        self.model = AutoModelForCausalLM.from_pretrained(model_name, quantization_config=quantization_config, device_map="auto")
        self.tokenizer = AutoTokenizer.from_pretrained(model_name)
        
        # Set pad_token to eos_token if not already set
        if (self.tokenizer.pad_token is None):
            self.tokenizer.pad_token = self.tokenizer.eos_token
        
        self.temperature = temperature
        self.max_new_tokens = max_new_tokens
        self.top_p = top_p
        self.repetition_penalty = repetition_penalty

    def format_prompt(self, message, history):
        fixed_prompt = """
            You are a smart mood analyser, who determines user mood. Based on the user input, classify the mood of the user into one of the four moods {Happy, Sad, Instrumental, Party}. If you are finding it difficult to classify into one of these four moods, keep the conversation going on until we classify the user’s mood. Return a single-word reply from one of the options if you have classified. Suppose you classify a sentence as happy, then just respond with "happy".

            Note: Do not write anything else other than the classified mood if classified.

            Note: If any question or any user text cannot be classified, follow up with a question to know the user's mood until you classify the mood.

            Note: Mood should be classified only from any of these 4 classes {Happy, Sad, Instrumental, Party}, if not any of these 4 then continue with a follow-up question until you classify the mood.

            Note: if user asks something like i need a coffee then do not classify the mood directly and ask more follow-up questions as asked in examples.

            Examples
            User: What is C programming?
            LLM Response: C programming is a programming language. How are you feeling now after knowing the answer?

            User: Can I get a coffee?
            LLM Response: It sounds like you're in need of a little pick-me-up. How are you feeling right now? Are you looking for something upbeat, something to relax to, or maybe some instrumental music while you enjoy your coffee?
            User: I feel like rocking
            LLM Response: Party

            User: I'm feeling so energetic today!
            LLM Response: Happy

            User: I'm feeling down today.
            LLM Response: Sad

            User: I'm ready to have some fun tonight!
            LLM Response: Party

            User: I need some background music while I am stuck in traffic.
            LLM Response: Instrumental

            User: Hi
            LLM Response: Hi, how are you doing?

            User: Feeling okay only.
            LLM Response: Are you having a good day?
            User: I don't know
            LLM Response: Do you want to listen to some relaxing music?
            User: No
            LLM Response: How about listening to some rock and roll music?
            User: Yes
            LLM Response: Party

            User: Where do I find an encyclopedia?
            LLM Response: You can find it in any of the libraries or on the Internet. Does this answer make you happy?

            User: I need a coffee
            LLM Response: It sounds like you're in need of a little pick-me-up. How are you feeling right now? Are you looking for something upbeat, something to relax to, or maybe some instrumental music while you enjoy your coffee?

            User: I just got promoted at work!
            LLM Response: Happy

            User: Today is my birthday!
            LLM Response: Happy

            User: I won a prize in the lottery.
            LLM Response: Happy

            User: I am so excited about my vacation next week!
            LLM Response: Happy

            User: I aced my exams!
            LLM Response: Happy

            User: I had a wonderful time with my family today.
            LLM Response: Happy

            User: I just finished a great workout!
            LLM Response: Happy

            User: I am feeling really good about myself today.
            LLM Response: Happy

            User: I finally finished my project and it was a success!
            LLM Response: Happy

            User: I just heard my favorite song on the radio.
            LLM Response: Happy

            User: My pet passed away yesterday.
            LLM Response: Sad

            User: I lost my job today.
            LLM Response: Sad

            User: I'm feeling really lonely.
            LLM Response: Sad

            User: I didn't get the results I wanted.
            LLM Response: Sad

            User: I had a fight with my best friend.
            LLM Response: Sad

            User: I'm feeling really overwhelmed with everything.
            LLM Response: Sad

            User: I just got some bad news.
            LLM Response: Sad

            User: I'm missing my family.
            LLM Response: Sad

            User: I am feeling really down today.
            LLM Response: Sad

            User: Nothing seems to be going right.
            LLM Response: Sad

            User: I need some music while I study.
            LLM Response: Instrumental

            User: I want to listen to something soothing while I work.
            LLM Response: Instrumental

            User: Do you have any recommendations for background music?
            LLM Response: Instrumental

            User: I'm looking for some relaxing tunes.
            LLM Response: Instrumental

            User: I need some music to focus on my tasks.
            LLM Response: Instrumental

            User: Can you suggest some ambient music for meditation?
            LLM Response: Instrumental

            User: What's good for background music during reading?
            LLM Response: Instrumental

            User: I need some calm music to help me sleep.
            LLM Response: Instrumental

            User: I prefer instrumental music while cooking.
            LLM Response: Instrumental

            User: What's the best music to play while doing yoga?
            LLM Response: Instrumental

            User: Let's have a blast tonight!
            LLM Response: Party

            User: I'm in the mood to dance!
            LLM Response: Party

            User: I want to celebrate all night long!
            LLM Response: Party

            User: Time to hit the club!
            LLM Response: Party

            User: I feel like partying till dawn.
            LLM Response: Party

            User: Let's get this party started!
            LLM Response: Party

            User: I'm ready to party hard tonight.
            LLM Response: Party

            User: I'm in the mood for some loud music and dancing!
            LLM Response: Party

            User: Tonight's going to be epic!
            LLM Response: Party

            User: Lets turn up the music and have some fun!
            LLM Response: Party
            """

        # Start with the fixed prompt
        prompt = f"<s>{fixed_prompt}"

        # Append the conversation history
        for user_prompt, bot_response in history:
            prompt += f"\nUser: {user_prompt}\nLLM Response: {bot_response}"

        # Add the current message
        prompt += f"\nUser: {message}\nLLM Response:"
        
        return prompt

    def generate(self, message, history, temperature=None, max_new_tokens=None, top_p=None, repetition_penalty=None):
        if temperature is None:
            temperature = self.temperature
        if max_new_tokens is None:
            max_new_tokens = self.max_new_tokens
        if top_p is None:
            top_p = self.top_p
        if repetition_penalty is None:
            repetition_penalty = self.repetition_penalty

        prompt = self.format_prompt(message, history)
        inputs = self.tokenizer(prompt, return_tensors="pt", padding=True).to("cuda")
        generate_kwargs = dict(
            temperature=temperature,
            max_new_tokens=max_new_tokens,
            top_p=top_p,
            repetition_penalty=repetition_penalty,
            do_sample=True,
            pad_token_id=self.tokenizer.pad_token_id,  # Explicitly set the pad_token_id
        )
        output_ids = self.model.generate(**inputs, **generate_kwargs)
        output = self.tokenizer.decode(output_ids[0], skip_special_tokens=True)
        return output[len(prompt):].strip()

def classify_mood(input_string):
    input_string = input_string.lower()
    mood_words = {"happy", "sad", "instrumental", "party"}
    for word in mood_words:
        if word in input_string:
            return word, True
    return None, False

def speech_to_text(speech):
    asr = pipeline("automatic-speech-recognition", model="facebook/wav2vec2-base-960h")
    text = asr(speech)["text"]
    return text

def text_to_speech(text):
    tts = pipeline("text-to-speech", model="facebook/fastspeech2-en-ljspeech")
    speech = tts(text)["audio"]
    return speech

if __name__ == "__main__":
    parser = argparse.ArgumentParser(description="Start the Mistral chatbot application.")
    parser.add_argument("--model_name", type=str, default="mistralai/Mistral-7B-Instruct-v0.2", help="The name of the model to use.")
    
    args = parser.parse_args()
    model_name = args.model_name

    # Instantiate the chatbot with necessary parameters
    mistral_chatbot = llmChatbot(model_name=model_name)
    history = []
    print("How are you doing today?")
    
    def chatbot_response(audio_input):
        text_input = speech_to_text(audio_input)
        result = mistral_chatbot.generate(text_input, history)
        mood, is_classified = classify_mood(result)
        if is_classified:
            response_text = mood.capitalize()
        else:
            response_text = result
        audio_output = text_to_speech(response_text)
        history.append((text_input, response_text))
        return audio_output, response_text

    gr.Interface(
        fn=chatbot_response,
        inputs=gr.Audio(source="microphone", type="filepath"),
        outputs=[gr.Audio(type="numpy"), "text"],
        live=True
    ).launch()