Anita-19 commited on
Commit
47f9e3c
Β·
verified Β·
1 Parent(s): 70c274a

Update main.py

Browse files
Files changed (1) hide show
  1. main.py +29 -28
main.py CHANGED
@@ -1,15 +1,17 @@
1
  from google.colab import drive
2
  drive.mount('/content/drive')
3
 
4
- """Install Dependencies
5
 
6
  pip install transformers librosa torch soundfile numba numpy TTS datasets gradio protobuf==3.20.3
7
 
8
- """Emotion Detection (Using Text Dataset)""""
 
9
  """
 
10
  !pip install --upgrade numpy tensorflow transformers TTS
11
 
12
- !pip freeze > requirements.txt"""
13
 
14
  from transformers import pipeline
15
 
@@ -51,6 +53,27 @@ def generate_emotional_speech(text, emotion):
51
  "shame": {"pitch": 0.8, "speed": 0.85}, # Quiet, subdued tone
52
 
53
  }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
54
 
55
  # Retrieve pitch and speed based on detected emotion
56
  settings = emotion_settings.get(emotion, {"pitch": 1.0, "speed": 1.0})
@@ -75,7 +98,6 @@ emotion = "happy"
75
  output_audio = generate_emotional_speech("Welcome to the smart library!", emotion)
76
  print(f"Generated Speech Saved At: {output_audio}")
77
 
78
-
79
  """Integrating the Workflow"""
80
 
81
  from IPython.display import Audio, display
@@ -250,28 +272,6 @@ save_path = "/content/drive/My Drive/fine_tuned_tacotron2.pth"
250
  torch.save(model.state_dict(), save_path)
251
 
252
 
253
- import librosa
254
- import soundfile as sf
255
-
256
- def adjust_pitch(audio_path, pitch_factor):
257
- # Load audio
258
- y, sr = librosa.load(audio_path)
259
- # Adjust pitch
260
- y_shifted = librosa.effects.pitch_shift(y, sr, n_steps=pitch_factor)
261
- # Save adjusted audio
262
- sf.write(audio_path, y_shifted, sr)
263
-
264
- def adjust_speed(audio_path, speed_factor):
265
- # Load the audio file
266
- y, sr = librosa.load(audio_path)
267
-
268
- # Adjust the speed (this alters the duration of the audio)
269
- y_speeded = librosa.effects.time_stretch(y, speed_factor)
270
-
271
- # Save the adjusted audio
272
- sf.write(audio_path, y_speeded, sr)
273
-
274
-
275
  """Set up the Gradio interface"""
276
 
277
  import gradio as gr
@@ -318,7 +318,8 @@ def emotion_aware_tts_pipeline(input_text=None, file_input=None):
318
 
319
  # Generate audio
320
  audio_path = "output.wav"
321
- tts_model.tts_to_file(text=input_text, file_path=audio_path)
 
322
 
323
 
324
  return f"Detected Emotion: {emotion} (Confidence: {confidence:.2f})", audio_path
@@ -345,4 +346,4 @@ iface = gr.Interface(
345
  )
346
 
347
  # Launch Gradio interface
348
- iface.launch()
 
1
  from google.colab import drive
2
  drive.mount('/content/drive')
3
 
4
+ """Install Dependencies"""
5
 
6
  pip install transformers librosa torch soundfile numba numpy TTS datasets gradio protobuf==3.20.3
7
 
8
+ """Emotion Detection (Using Text Dataset)
9
+
10
  """
11
+
12
  !pip install --upgrade numpy tensorflow transformers TTS
13
 
14
+ !pip freeze > requirements.txt
15
 
16
  from transformers import pipeline
17
 
 
53
  "shame": {"pitch": 0.8, "speed": 0.85}, # Quiet, subdued tone
54
 
55
  }
56
+
57
+ import librosa
58
+ import soundfile as sf
59
+
60
+ def adjust_pitch(audio_path, pitch_factor):
61
+ # Load audio
62
+ y, sr = librosa.load(audio_path)
63
+ # Adjust pitch
64
+ y_shifted = librosa.effects.pitch_shift(y, sr, n_steps=pitch_factor)
65
+ # Save adjusted audio
66
+ sf.write(audio_path, y_shifted, sr)
67
+
68
+ def adjust_speed(audio_path, speed_factor):
69
+ # Load the audio file
70
+ y, sr = librosa.load(audio_path)
71
+
72
+ # Adjust the speed (this alters the duration of the audio)
73
+ y_speeded = librosa.effects.time_stretch(y, speed_factor)
74
+
75
+ # Save the adjusted audio
76
+ sf.write(audio_path, y_speeded, sr)
77
 
78
  # Retrieve pitch and speed based on detected emotion
79
  settings = emotion_settings.get(emotion, {"pitch": 1.0, "speed": 1.0})
 
98
  output_audio = generate_emotional_speech("Welcome to the smart library!", emotion)
99
  print(f"Generated Speech Saved At: {output_audio}")
100
 
 
101
  """Integrating the Workflow"""
102
 
103
  from IPython.display import Audio, display
 
272
  torch.save(model.state_dict(), save_path)
273
 
274
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
275
  """Set up the Gradio interface"""
276
 
277
  import gradio as gr
 
318
 
319
  # Generate audio
320
  audio_path = "output.wav"
321
+ tts_model.tts_to_file(text=input_text, file_path=audio_path, speed=speed, pitch=pitch)
322
+
323
 
324
 
325
  return f"Detected Emotion: {emotion} (Confidence: {confidence:.2f})", audio_path
 
346
  )
347
 
348
  # Launch Gradio interface
349
+ iface.launch()