syedmudassir16 commited on
Commit
f8d538d
·
verified ·
1 Parent(s): fb5cd3b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +103 -94
app.py CHANGED
@@ -1,110 +1,119 @@
1
- import os
2
  import gradio as gr
3
- import whisper
4
- from gtts import gTTS
5
- import io
6
- from groq import Groq
 
7
 
8
- # Initialize the Groq client
9
- client = Groq(api_key=os.environ.get("GROQ_API_KEY"))
 
 
10
 
11
- # Load the Whisper model
12
- model = whisper.load_model("base")
 
13
 
14
- def process_audio(file_path):
15
- try:
16
- # Load the audio file
17
- audio = whisper.load_audio(file_path)
 
 
 
 
18
 
19
- # Transcribe the audio using Whisper
20
- result = model.transcribe(audio)
21
- text = result["text"]
 
 
22
 
23
- # Generate a response using Groq
24
- chat_completion = client.chat.completions.create(
25
- messages=[{"role": "user", "content": text}],
26
- model="llama3-8b-8192", # Replace with the correct model if necessary
27
- )
 
 
 
28
 
29
- # Access the response using dot notation
30
- response_message = chat_completion.choices[0].message.content.strip()
31
 
32
- # Convert the response text to speech
33
- tts = gTTS(response_message)
34
- response_audio_io = io.BytesIO()
35
- tts.write_to_fp(response_audio_io) # Save the audio to the BytesIO object
36
- response_audio_io.seek(0)
37
 
38
- # Save audio to a file to ensure it's generated correctly
39
- response_audio_path = "response.mp3"
40
- with open(response_audio_path, "wb") as audio_file:
41
- audio_file.write(response_audio_io.getvalue())
 
 
 
42
 
43
- # Return the response text and the path to the saved audio file
44
- return response_message, response_audio_path
 
 
45
 
46
- except Exception as e:
47
- return f"An error occurred: {e}", None
48
 
49
- # Create the Gradio interface with customized UI
50
- with gr.Blocks() as demo:
51
- gr.Markdown(
52
- """
53
- <style>
54
- .gradio-container {
55
- font-family: Arial, sans-serif;
56
- background-color: #f0f4c3; /* Light green background color */
57
- border-radius: 10px;
58
- padding: 20px;
59
- box-shadow: 0 4px 12px rgba(0,0,0,0.2);
60
- text-align: center;
61
- }
62
- .gradio-input, .gradio-output {
63
- border-radius: 6px;
64
- border: 1px solid #ddd;
65
- padding: 10px;
66
- }
67
- .gradio-button {
68
- background-color: #ff7043;
69
- color: white;
70
- border-radius: 6px;
71
- border: none;
72
- padding: 10px 20px; /* Adjusted padding */
73
- font-size: 16px; /* Adjusted font size */
74
- cursor: pointer;
75
- }
76
- .gradio-button:hover {
77
- background-color: #e64a19;
78
- }
79
- .gradio-title {
80
- font-size: 28px;
81
- font-weight: bold;
82
- margin-bottom: 20px;
83
- color: #37474f;
84
- }
85
- .gradio-description {
86
- font-size: 16px;
87
- margin-bottom: 20px;
88
- color: #616161;
89
- }
90
- </style>
91
- """
92
- )
93
 
94
- gr.Markdown("# Voice-to-Voice Chatbot\nDeveloped by Salman Maqbool ❤️")
95
- gr.Markdown("Upload an audio file to interact with the voice-to-voice chatbot. The chatbot will transcribe the audio, generate a response, and provide a spoken reply.")
 
 
 
 
 
96
 
97
- with gr.Row():
98
- with gr.Column():
99
- audio_input = gr.Audio(type="filepath", label="Upload Audio File")
100
- submit_button = gr.Button("Submit")
101
-
102
- with gr.Column():
103
- response_text = gr.Textbox(label="Response Text", placeholder="The AI-generated response will appear here", lines=5)
104
- response_audio = gr.Audio(label="Response Audio", type="filepath")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
105
 
106
- # Link the submit button to the process_audio function
107
- submit_button.click(fn=process_audio, inputs=audio_input, outputs=[response_text, response_audio])
 
 
108
 
109
- # Launch the Gradio app
110
- demo.launch()
 
 
1
  import gradio as gr
2
+ from huggingface_hub import InferenceClient
3
+ from transformers import pipeline
4
+ import edge_tts
5
+ import tempfile
6
+ import asyncio
7
 
8
+ # Initialize the inference client with your Hugging Face token
9
+ client = InferenceClient("mistralai/Mistral-7B-Instruct-v0.1")
10
+ # Initialize the ASR pipeline
11
+ asr = pipeline("automatic-speech-recognition", "facebook/wav2vec2-base-960h")
12
 
13
+ def speech_to_text(speech):
14
+ """Converts speech to text using the ASR pipeline."""
15
+ return asr(speech)["text"]
16
 
17
+ def classify_mood(input_string):
18
+ """Classifies the mood based on keywords in the input string."""
19
+ input_string = input_string.lower()
20
+ mood_words = {"happy", "sad", "instrumental", "party"}
21
+ for word in mood_words:
22
+ if word in input_string:
23
+ return word, True
24
+ return None, False
25
 
26
+ def generate(prompt, history, temperature=0.1, max_new_tokens=2048, top_p=0.8, repetition_penalty=1.0):
27
+ temperature = float(temperature)
28
+ if temperature < 1e-2:
29
+ temperature = 1e-2
30
+ top_p = float(top_p)
31
 
32
+ generate_kwargs = dict(
33
+ temperature=temperature,
34
+ max_new_tokens=max_new_tokens,
35
+ top_p=top_p,
36
+ repetition_penalty=repetition_penalty,
37
+ do_sample=True,
38
+ seed=42,
39
+ )
40
 
41
+ formatted_prompt = format_prompt(prompt, history)
 
42
 
43
+ stream = client.text_generation(formatted_prompt, **generate_kwargs, stream=True, details=True, return_full_text=False)
44
+ output = ""
 
 
 
45
 
46
+ for response in stream:
47
+ output += response.token.text
48
+ mood, is_classified = classify_mood(output)
49
+ if is_classified:
50
+ playlist_message = f"Playing {mood.capitalize()} playlist for you!"
51
+ return playlist_message
52
+ return output
53
 
54
+ def format_prompt(message, history):
55
+ """Formats the prompt including fixed instructions and conversation history."""
56
+ fixed_prompt = """
57
+ You are a smart mood analyser, who determines user mood. Based on the user input, classify the mood of the user into one of the four moods {Happy, Sad, Instrumental, Party}. If you are finding it difficult to classify into one of these four moods, keep the conversation going on until we classify the user's mood. Return a single-word reply from one of the options if you have classified. Suppose you classify a sentence as happy, then just respond with "happy".
58
 
59
+ Note: Do not write anything else other than the classified mood if classified.
 
60
 
61
+ Note: If any question or any user text cannot be classified, follow up with a question to know the user's mood until you classify the mood.
62
+
63
+ Note: Mood should be classified only from any of these 4 classes {Happy, Sad, Instrumental, Party}, if not any of these 4 then continue with a follow-up question until you classify the mood.
64
+
65
+ Note: if user asks something like i need a coffee then do not classify the mood directly and ask more follow-up questions as asked in examples.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
66
 
67
+ [Examples omitted for brevity]
68
+ """
69
+ prompt = f"{fixed_prompt}"
70
+ for user_prompt, bot_response in history:
71
+ prompt += f"\nUser: {user_prompt}\nLLM Response: {bot_response}"
72
+ prompt += f"\nUser: {message}\nLLM Response:"
73
+ return prompt
74
 
75
+ async def text_to_speech(text):
76
+ communicate = edge_tts.Communicate(text)
77
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp_file:
78
+ tmp_path = tmp_file.name
79
+ await communicate.save(tmp_path)
80
+ return tmp_path
81
+
82
+ def chatbot(audio, history):
83
+ if audio is None:
84
+ return "", history
85
+
86
+ text = speech_to_text(audio)
87
+ response = generate(text, history)
88
+ history.append((text, response))
89
+
90
+ return "", history
91
+
92
+ def text_input(text, history):
93
+ response = generate(text, history)
94
+ history.append((text, response))
95
+ return "", history
96
+
97
+ async def generate_audio(history):
98
+ if len(history) > 0:
99
+ last_response = history[-1][1]
100
+ audio_path = await text_to_speech(last_response)
101
+ return audio_path
102
+ return None
103
+
104
+ # Gradio interface setup
105
+ with gr.Blocks() as demo:
106
+ gr.Markdown("# Mood-Based Music Recommender with Voice Chat")
107
+
108
+ chatbot = gr.Chatbot()
109
+ audio_input = gr.Audio(source="microphone", type="filepath")
110
+ text_input = gr.Textbox(placeholder="Type your message here...")
111
+ audio_output = gr.Audio(label="AI Response")
112
 
113
+ audio_input.change(chatbot, inputs=[audio_input, chatbot], outputs=[audio_input, chatbot])
114
+ text_input.submit(text_input, inputs=[text_input, chatbot], outputs=[text_input, chatbot])
115
+
116
+ chatbot.change(generate_audio, inputs=[chatbot], outputs=[audio_output])
117
 
118
+ if __name__ == "__main__":
119
+ demo.launch()