Spaces:
Runtime error
Runtime error
Update main.py
Browse files
main.py
CHANGED
@@ -1,17 +1,16 @@
|
|
1 |
from google.colab import drive
|
2 |
drive.mount('/content/drive')
|
3 |
|
4 |
-
|
5 |
-
|
6 |
pip install transformers librosa torch soundfile numba numpy TTS datasets gradio protobuf==3.20.3
|
7 |
|
8 |
-
|
9 |
|
10 |
-
"""
|
11 |
|
12 |
!pip install --upgrade numpy tensorflow transformers TTS
|
13 |
|
14 |
-
!pip freeze > requirements.txt
|
15 |
|
16 |
from transformers import pipeline
|
17 |
|
@@ -29,7 +28,7 @@ text = "I am feeling excited today!"
|
|
29 |
emotion, confidence = detect_emotion(text)
|
30 |
print(f"Detected Emotion: {emotion}, Confidence: {confidence}")
|
31 |
|
32 |
-
|
33 |
|
34 |
import torch
|
35 |
import librosa
|
@@ -40,17 +39,19 @@ from TTS.api import TTS # Using Coqui TTS for simplicity
|
|
40 |
tts_model = TTS(model_name="tts_models/en/ljspeech/tacotron2-DDC")
|
41 |
|
42 |
|
43 |
-
|
44 |
-
|
45 |
-
|
46 |
-
"
|
47 |
-
"
|
48 |
-
"
|
49 |
-
"
|
50 |
-
"
|
51 |
-
"
|
52 |
-
}
|
|
|
53 |
|
|
|
54 |
|
55 |
import librosa
|
56 |
import soundfile as sf
|
@@ -73,32 +74,34 @@ def adjust_speed(audio_path, speed_factor):
|
|
73 |
# Save the adjusted audio
|
74 |
sf.write(audio_path, y_speeded, sr)
|
75 |
|
76 |
-
|
77 |
-
|
78 |
-
|
79 |
-
#
|
80 |
-
|
81 |
-
|
82 |
-
|
83 |
-
|
84 |
-
|
85 |
-
|
86 |
-
|
87 |
-
|
88 |
-
|
89 |
-
|
90 |
-
|
|
|
|
|
91 |
|
92 |
-
|
93 |
-
|
|
|
|
|
94 |
|
95 |
-
|
96 |
|
97 |
-
# Integrating Emotion Detection and TTS Pipeline
|
98 |
from IPython.display import Audio, display
|
99 |
|
100 |
def emotion_aware_tts_pipeline(text):
|
101 |
-
# Ensure the emotion_classifier is being accessed globally
|
102 |
emotion, confidence = detect_emotion(text)
|
103 |
print(f"Emotion Detected: {emotion} with Confidence: {confidence:.2f}")
|
104 |
|
@@ -210,7 +213,7 @@ tokenizer.save_pretrained(tokenizer_save_path)
|
|
210 |
|
211 |
print("Model and tokenizer saved to Google Drive.")
|
212 |
|
213 |
-
|
214 |
|
215 |
from transformers import AutoModelForSequenceClassification, AutoTokenizer
|
216 |
|
@@ -228,7 +231,7 @@ tokenizer = AutoTokenizer.from_pretrained(tokenizer_save_path)
|
|
228 |
|
229 |
print("Fine-tuned model and tokenizer loaded successfully.")
|
230 |
|
231 |
-
|
232 |
|
233 |
from transformers import pipeline
|
234 |
|
@@ -240,7 +243,7 @@ text = "I feel so upset today!"
|
|
240 |
result = emotion_classifier(text)
|
241 |
print(result)
|
242 |
|
243 |
-
|
244 |
|
245 |
from TTS.api import TTS
|
246 |
from TTS.utils.audio import AudioProcessor
|
@@ -268,7 +271,7 @@ save_path = "/content/drive/My Drive/fine_tuned_tacotron2.pth"
|
|
268 |
torch.save(model.state_dict(), save_path)
|
269 |
|
270 |
|
271 |
-
|
272 |
|
273 |
import gradio as gr
|
274 |
from transformers import pipeline
|
@@ -282,18 +285,17 @@ tts_model = TTS(model_name="tts_models/en/ljspeech/tacotron2-DDC")
|
|
282 |
|
283 |
# Emotion-specific settings for pitch and speed
|
284 |
emotion_settings = {
|
285 |
-
"neutral": {"pitch": 1.0, "speed": 1.0
|
286 |
-
"joy": {"pitch": 1.3, "speed": 1.2
|
287 |
-
"sadness": {"pitch": 0.8, "speed": 0.9
|
288 |
-
"anger": {"pitch": 1.6, "speed": 1.4
|
289 |
-
"fear": {"pitch": 1.2, "speed": 0.95
|
290 |
-
"surprise": {"pitch": 1.5, "speed": 1.3
|
291 |
-
"disgust": {"pitch": 0.9, "speed": 0.95
|
292 |
-
"shame": {"pitch": 0.8, "speed": 0.85
|
293 |
}
|
294 |
|
295 |
|
296 |
-
|
297 |
# Function to process text or file input and generate audio
|
298 |
def emotion_aware_tts_pipeline(input_text=None, file_input=None):
|
299 |
try:
|
|
|
1 |
from google.colab import drive
|
2 |
drive.mount('/content/drive')
|
3 |
|
4 |
+
#Install Dependencies"""
|
5 |
+
"""
|
6 |
pip install transformers librosa torch soundfile numba numpy TTS datasets gradio protobuf==3.20.3
|
7 |
|
8 |
+
#Emotion Detection (Using Text Dataset)
|
9 |
|
|
|
10 |
|
11 |
!pip install --upgrade numpy tensorflow transformers TTS
|
12 |
|
13 |
+
!pip freeze > requirements.txt"""
|
14 |
|
15 |
from transformers import pipeline
|
16 |
|
|
|
28 |
emotion, confidence = detect_emotion(text)
|
29 |
print(f"Detected Emotion: {emotion}, Confidence: {confidence}")
|
30 |
|
31 |
+
#Emotion-Aware TTS (Using Tacotron 2 or Similar)"""
|
32 |
|
33 |
import torch
|
34 |
import librosa
|
|
|
39 |
tts_model = TTS(model_name="tts_models/en/ljspeech/tacotron2-DDC")
|
40 |
|
41 |
|
42 |
+
def generate_emotional_speech(text, emotion):
|
43 |
+
# Map emotion to voice modulation parameters (pitch, speed)
|
44 |
+
emotion_settings = {
|
45 |
+
"neutral": {"pitch": 1.0, "speed": 1.0}, # Baseline conversational tone
|
46 |
+
"joy": {"pitch": 1.3, "speed": 1.2}, # Upbeat and energetic
|
47 |
+
"sadness": {"pitch": 0.8, "speed": 0.9}, # Subdued, slow tone
|
48 |
+
"anger": {"pitch": 1.6, "speed": 1.4}, # Intense and sharp
|
49 |
+
"fear": {"pitch": 1.2, "speed": 0.95}, # Tense and slightly slow
|
50 |
+
"surprise": {"pitch": 1.5, "speed": 1.3}, # Excitement with high pitch and fast speech
|
51 |
+
"disgust": {"pitch": 0.9, "speed": 0.95}, # Low and deliberate
|
52 |
+
"shame": {"pitch": 0.8, "speed": 0.85}, # Quiet, subdued tone
|
53 |
|
54 |
+
}
|
55 |
|
56 |
import librosa
|
57 |
import soundfile as sf
|
|
|
74 |
# Save the adjusted audio
|
75 |
sf.write(audio_path, y_speeded, sr)
|
76 |
|
77 |
+
# Retrieve pitch and speed based on detected emotion
|
78 |
+
settings = emotion_settings.get(emotion, {"pitch": 1.0, "speed": 1.0})
|
79 |
+
# Generate speech with the TTS model
|
80 |
+
# Instead of directly passing speed and pitch to tts_to_file,
|
81 |
+
# We adjust the text to simulate the effect. This is a temporary solution.
|
82 |
+
# You might need to fine-tune these adjustments or consider a different TTS library
|
83 |
+
# with better control over speech parameters.
|
84 |
+
adjusted_text = text
|
85 |
+
if settings['speed'] > 1.0:
|
86 |
+
adjusted_text = adjusted_text.replace(" ", ".") # Simulate faster speech
|
87 |
+
elif settings['speed'] < 1.0:
|
88 |
+
adjusted_text = adjusted_text.replace(" ", "...") # Simulate slower speech
|
89 |
+
|
90 |
+
# Explicitly specify the output path
|
91 |
+
audio_path = "output.wav" # Or any desired filename
|
92 |
+
tts_model.tts_to_file(text=adjusted_text, file_path=audio_path) # Pass file_path argument
|
93 |
+
return audio_path
|
94 |
|
95 |
+
# Example usage
|
96 |
+
emotion = "happy"
|
97 |
+
output_audio = generate_emotional_speech("Welcome to the smart library!", emotion)
|
98 |
+
print(f"Generated Speech Saved At: {output_audio}")
|
99 |
|
100 |
+
"""Integrating the Workflow"""
|
101 |
|
|
|
102 |
from IPython.display import Audio, display
|
103 |
|
104 |
def emotion_aware_tts_pipeline(text):
|
|
|
105 |
emotion, confidence = detect_emotion(text)
|
106 |
print(f"Emotion Detected: {emotion} with Confidence: {confidence:.2f}")
|
107 |
|
|
|
213 |
|
214 |
print("Model and tokenizer saved to Google Drive.")
|
215 |
|
216 |
+
#Reload the Fine-Tuned Model"""
|
217 |
|
218 |
from transformers import AutoModelForSequenceClassification, AutoTokenizer
|
219 |
|
|
|
231 |
|
232 |
print("Fine-tuned model and tokenizer loaded successfully.")
|
233 |
|
234 |
+
#Test the Reloaded Model"""
|
235 |
|
236 |
from transformers import pipeline
|
237 |
|
|
|
243 |
result = emotion_classifier(text)
|
244 |
print(result)
|
245 |
|
246 |
+
#Fine-tuning the TTS System"""
|
247 |
|
248 |
from TTS.api import TTS
|
249 |
from TTS.utils.audio import AudioProcessor
|
|
|
271 |
torch.save(model.state_dict(), save_path)
|
272 |
|
273 |
|
274 |
+
#Set up the Gradio interface
|
275 |
|
276 |
import gradio as gr
|
277 |
from transformers import pipeline
|
|
|
285 |
|
286 |
# Emotion-specific settings for pitch and speed
|
287 |
emotion_settings = {
|
288 |
+
"neutral": {"pitch": 1.0, "speed": 1.0},
|
289 |
+
"joy": {"pitch": 1.3, "speed": 1.2},
|
290 |
+
"sadness": {"pitch": 0.8, "speed": 0.9},
|
291 |
+
"anger": {"pitch": 1.6, "speed": 1.4},
|
292 |
+
"fear": {"pitch": 1.2, "speed": 0.95},
|
293 |
+
"surprise": {"pitch": 1.5, "speed": 1.3},
|
294 |
+
"disgust": {"pitch": 0.9, "speed": 0.95},
|
295 |
+
"shame": {"pitch": 0.8, "speed": 0.85},
|
296 |
}
|
297 |
|
298 |
|
|
|
299 |
# Function to process text or file input and generate audio
|
300 |
def emotion_aware_tts_pipeline(input_text=None, file_input=None):
|
301 |
try:
|