Spaces:
Runtime error
Runtime error
Update main.py
Browse files
main.py
CHANGED
@@ -1,15 +1,17 @@
|
|
1 |
from google.colab import drive
|
2 |
drive.mount('/content/drive')
|
3 |
|
4 |
-
"""Install Dependencies
|
5 |
|
6 |
pip install transformers librosa torch soundfile numba numpy TTS datasets gradio protobuf==3.20.3
|
7 |
|
8 |
-
"""Emotion Detection (Using Text Dataset)
|
|
|
9 |
"""
|
|
|
10 |
!pip install --upgrade numpy tensorflow transformers TTS
|
11 |
|
12 |
-
!pip freeze > requirements.txt
|
13 |
|
14 |
from transformers import pipeline
|
15 |
|
@@ -51,6 +53,27 @@ def generate_emotional_speech(text, emotion):
|
|
51 |
"shame": {"pitch": 0.8, "speed": 0.85}, # Quiet, subdued tone
|
52 |
|
53 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
54 |
|
55 |
# Retrieve pitch and speed based on detected emotion
|
56 |
settings = emotion_settings.get(emotion, {"pitch": 1.0, "speed": 1.0})
|
@@ -75,7 +98,6 @@ emotion = "happy"
|
|
75 |
output_audio = generate_emotional_speech("Welcome to the smart library!", emotion)
|
76 |
print(f"Generated Speech Saved At: {output_audio}")
|
77 |
|
78 |
-
|
79 |
"""Integrating the Workflow"""
|
80 |
|
81 |
from IPython.display import Audio, display
|
@@ -250,28 +272,6 @@ save_path = "/content/drive/My Drive/fine_tuned_tacotron2.pth"
|
|
250 |
torch.save(model.state_dict(), save_path)
|
251 |
|
252 |
|
253 |
-
import librosa
|
254 |
-
import soundfile as sf
|
255 |
-
|
256 |
-
def adjust_pitch(audio_path, pitch_factor):
|
257 |
-
# Load audio
|
258 |
-
y, sr = librosa.load(audio_path)
|
259 |
-
# Adjust pitch
|
260 |
-
y_shifted = librosa.effects.pitch_shift(y, sr, n_steps=pitch_factor)
|
261 |
-
# Save adjusted audio
|
262 |
-
sf.write(audio_path, y_shifted, sr)
|
263 |
-
|
264 |
-
def adjust_speed(audio_path, speed_factor):
|
265 |
-
# Load the audio file
|
266 |
-
y, sr = librosa.load(audio_path)
|
267 |
-
|
268 |
-
# Adjust the speed (this alters the duration of the audio)
|
269 |
-
y_speeded = librosa.effects.time_stretch(y, speed_factor)
|
270 |
-
|
271 |
-
# Save the adjusted audio
|
272 |
-
sf.write(audio_path, y_speeded, sr)
|
273 |
-
|
274 |
-
|
275 |
"""Set up the Gradio interface"""
|
276 |
|
277 |
import gradio as gr
|
@@ -318,7 +318,8 @@ def emotion_aware_tts_pipeline(input_text=None, file_input=None):
|
|
318 |
|
319 |
# Generate audio
|
320 |
audio_path = "output.wav"
|
321 |
-
tts_model.tts_to_file(text=input_text, file_path=audio_path)
|
|
|
322 |
|
323 |
|
324 |
return f"Detected Emotion: {emotion} (Confidence: {confidence:.2f})", audio_path
|
@@ -345,4 +346,4 @@ iface = gr.Interface(
|
|
345 |
)
|
346 |
|
347 |
# Launch Gradio interface
|
348 |
-
iface.launch()
|
|
|
1 |
from google.colab import drive
|
2 |
drive.mount('/content/drive')
|
3 |
|
4 |
+
"""Install Dependencies"""
|
5 |
|
6 |
pip install transformers librosa torch soundfile numba numpy TTS datasets gradio protobuf==3.20.3
|
7 |
|
8 |
+
"""Emotion Detection (Using Text Dataset)
|
9 |
+
|
10 |
"""
|
11 |
+
|
12 |
!pip install --upgrade numpy tensorflow transformers TTS
|
13 |
|
14 |
+
!pip freeze > requirements.txt
|
15 |
|
16 |
from transformers import pipeline
|
17 |
|
|
|
53 |
"shame": {"pitch": 0.8, "speed": 0.85}, # Quiet, subdued tone
|
54 |
|
55 |
}
|
56 |
+
|
57 |
+
import librosa
|
58 |
+
import soundfile as sf
|
59 |
+
|
60 |
+
def adjust_pitch(audio_path, pitch_factor):
|
61 |
+
# Load audio
|
62 |
+
y, sr = librosa.load(audio_path)
|
63 |
+
# Adjust pitch
|
64 |
+
y_shifted = librosa.effects.pitch_shift(y, sr, n_steps=pitch_factor)
|
65 |
+
# Save adjusted audio
|
66 |
+
sf.write(audio_path, y_shifted, sr)
|
67 |
+
|
68 |
+
def adjust_speed(audio_path, speed_factor):
|
69 |
+
# Load the audio file
|
70 |
+
y, sr = librosa.load(audio_path)
|
71 |
+
|
72 |
+
# Adjust the speed (this alters the duration of the audio)
|
73 |
+
y_speeded = librosa.effects.time_stretch(y, speed_factor)
|
74 |
+
|
75 |
+
# Save the adjusted audio
|
76 |
+
sf.write(audio_path, y_speeded, sr)
|
77 |
|
78 |
# Retrieve pitch and speed based on detected emotion
|
79 |
settings = emotion_settings.get(emotion, {"pitch": 1.0, "speed": 1.0})
|
|
|
98 |
output_audio = generate_emotional_speech("Welcome to the smart library!", emotion)
|
99 |
print(f"Generated Speech Saved At: {output_audio}")
|
100 |
|
|
|
101 |
"""Integrating the Workflow"""
|
102 |
|
103 |
from IPython.display import Audio, display
|
|
|
272 |
torch.save(model.state_dict(), save_path)
|
273 |
|
274 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
275 |
"""Set up the Gradio interface"""
|
276 |
|
277 |
import gradio as gr
|
|
|
318 |
|
319 |
# Generate audio
|
320 |
audio_path = "output.wav"
|
321 |
+
tts_model.tts_to_file(text=input_text, file_path=audio_path, speed=speed, pitch=pitch)
|
322 |
+
|
323 |
|
324 |
|
325 |
return f"Detected Emotion: {emotion} (Confidence: {confidence:.2f})", audio_path
|
|
|
346 |
)
|
347 |
|
348 |
# Launch Gradio interface
|
349 |
+
iface.launch()
|