Spaces:

Anita-19
/

emotion-aware-tts

Running

App Files Files Community

Anita-19 commited on Jan 23

Commit

d582602

verified ·

1 Parent(s): 8e5bf2a

Update main.py

Browse files

Files changed (1) hide show

main.py +33 -30

main.py CHANGED Viewed

@@ -1,12 +1,13 @@
 from google.colab import drive
 drive.mount('/content/drive')
-#Install Dependencies"""
-"""
 pip install transformers librosa torch soundfile numba numpy TTS datasets gradio protobuf==3.20.3
-#Emotion Detection (Using Text Dataset)
 !pip install --upgrade numpy tensorflow transformers TTS
@@ -28,7 +29,7 @@ text = "I am feeling excited today!"
 emotion, confidence = detect_emotion(text)
 print(f"Detected Emotion: {emotion}, Confidence: {confidence}")
-#Emotion-Aware TTS (Using Tacotron 2 or Similar)"""
 import torch
 import librosa
@@ -52,27 +53,6 @@ def generate_emotional_speech(text, emotion):
     "shame": {"pitch": 0.8, "speed": 0.85},      # Quiet, subdued tone
 }
-import librosa
-import soundfile as sf
-def adjust_pitch(audio_path, pitch_factor):
-    # Load audio
-    y, sr = librosa.load(audio_path)
-    # Adjust pitch
-    y_shifted = librosa.effects.pitch_shift(y, sr, n_steps=pitch_factor)
-    # Save adjusted audio
-    sf.write(audio_path, y_shifted, sr)
-def adjust_speed(audio_path, speed_factor):
-    # Load the audio file
-    y, sr = librosa.load(audio_path)
-    # Adjust the speed (this alters the duration of the audio)
-    y_speeded = librosa.effects.time_stretch(y, speed_factor)
-    # Save the adjusted audio
-    sf.write(audio_path, y_speeded, sr)
     # Retrieve pitch and speed based on detected emotion
     settings = emotion_settings.get(emotion, {"pitch": 1.0, "speed": 1.0})
@@ -97,6 +77,29 @@ emotion = "happy"
 output_audio = generate_emotional_speech("Welcome to the smart library!", emotion)
 print(f"Generated Speech Saved At: {output_audio}")
 """Integrating the Workflow"""
 from IPython.display import Audio, display
@@ -213,7 +216,7 @@ tokenizer.save_pretrained(tokenizer_save_path)
 print("Model and tokenizer saved to Google Drive.")
-#Reload the Fine-Tuned Model"""
 from transformers import AutoModelForSequenceClassification, AutoTokenizer
@@ -231,7 +234,7 @@ tokenizer = AutoTokenizer.from_pretrained(tokenizer_save_path)
 print("Fine-tuned model and tokenizer loaded successfully.")
-#Test the Reloaded Model"""
 from transformers import pipeline
@@ -243,7 +246,7 @@ text = "I feel so upset today!"
 result = emotion_classifier(text)
 print(result)
-#Fine-tuning the TTS System"""
 from TTS.api import TTS
 from TTS.utils.audio import AudioProcessor
@@ -271,7 +274,7 @@ save_path = "/content/drive/My Drive/fine_tuned_tacotron2.pth"
 torch.save(model.state_dict(), save_path)
-#Set up the Gradio interface
 import gradio as gr
 from transformers import pipeline
@@ -345,4 +348,4 @@ iface = gr.Interface(
 )
 # Launch Gradio interface
-iface.launch()

 from google.colab import drive
 drive.mount('/content/drive')
+"""Install Dependencies
 pip install transformers librosa torch soundfile numba numpy TTS datasets gradio protobuf==3.20.3
+"""Emotion Detection (Using Text Dataset)
+"""
 !pip install --upgrade numpy tensorflow transformers TTS
 emotion, confidence = detect_emotion(text)
 print(f"Detected Emotion: {emotion}, Confidence: {confidence}")
+"""Emotion-Aware TTS (Using Tacotron 2 or Similar)"""
 import torch
 import librosa
     "shame": {"pitch": 0.8, "speed": 0.85},      # Quiet, subdued tone
 }
     # Retrieve pitch and speed based on detected emotion
     settings = emotion_settings.get(emotion, {"pitch": 1.0, "speed": 1.0})
 output_audio = generate_emotional_speech("Welcome to the smart library!", emotion)
 print(f"Generated Speech Saved At: {output_audio}")
+#
+import librosa
+import soundfile as sf
+def adjust_pitch(audio_path, pitch_factor):
+    # Load audio
+    y, sr = librosa.load(audio_path)
+    # Adjust pitch
+    y_shifted = librosa.effects.pitch_shift(y, sr, n_steps=pitch_factor)
+    # Save adjusted audio
+    sf.write(audio_path, y_shifted, sr)
+def adjust_speed(audio_path, speed_factor):
+    # Load the audio file
+    y, sr = librosa.load(audio_path)
+    # Adjust the speed (this alters the duration of the audio)
+    y_speeded = librosa.effects.time_stretch(y, speed_factor)
+    # Save the adjusted audio
+    sf.write(audio_path, y_speeded, sr)
 """Integrating the Workflow"""
 from IPython.display import Audio, display
 print("Model and tokenizer saved to Google Drive.")
+"""Reload the Fine-Tuned Model"""
 from transformers import AutoModelForSequenceClassification, AutoTokenizer
 print("Fine-tuned model and tokenizer loaded successfully.")
+"""Test the Reloaded Model"""
 from transformers import pipeline
 result = emotion_classifier(text)
 print(result)
+"""Fine-tuning the TTS System"""
 from TTS.api import TTS
 from TTS.utils.audio import AudioProcessor
 torch.save(model.state_dict(), save_path)
+"""Set up the Gradio interface"""
 import gradio as gr
 from transformers import pipeline
 )
 # Launch Gradio interface
+iface.launch()