Anita-19 commited on
Commit
15dbab2
Β·
verified Β·
1 Parent(s): 5a24114

Update main.py

Browse files
Files changed (1) hide show
  1. main.py +31 -51
main.py CHANGED
@@ -3,15 +3,15 @@ drive.mount('/content/drive')
3
 
4
  """Install Dependencies"""
5
 
6
- #pip install transformers librosa torch soundfile numba numpy TTS datasets gradio protobuf==3.20.3
7
 
8
  """Emotion Detection (Using Text Dataset)
9
 
10
  """
11
 
12
- #!pip install --upgrade numpy tensorflow transformers TTS
13
 
14
- #!pip freeze > requirements.txt
15
 
16
  from transformers import pipeline
17
 
@@ -43,21 +43,26 @@ tts_model = TTS(model_name="tts_models/en/ljspeech/tacotron2-DDC")
43
  def generate_emotional_speech(text, emotion):
44
  # Map emotion to voice modulation parameters (pitch, speed)
45
  emotion_settings = {
46
- "neutral": {"pitch": 1.0, "speed": 1.0}, # Baseline conversational tone
47
- "joy": {"pitch": 1.3, "speed": 1.2}, # Upbeat and energetic
48
- "sadness": {"pitch": 0.8, "speed": 0.9}, # Subdued, slow tone
49
- "anger": {"pitch": 1.6, "speed": 1.4}, # Intense and sharp
50
- "fear": {"pitch": 1.2, "speed": 0.95}, # Tense and slightly slow
51
- "surprise": {"pitch": 1.5, "speed": 1.3}, # Excitement with high pitch and fast speech
52
- "disgust": {"pitch": 0.9, "speed": 0.95}, # Low and deliberate
53
- "shame": {"pitch": 0.8, "speed": 0.85}, # Quiet, subdued tone
54
- }
55
-
 
 
 
56
  # Retrieve pitch and speed based on detected emotion
57
  settings = emotion_settings.get(emotion, {"pitch": 1.0, "speed": 1.0})
58
- # Generate speech with the TTS model, instead of directly passing speed and pitch to tts_to_file,
 
59
  # We adjust the text to simulate the effect. This is a temporary solution.
60
- # You might need to fine-tune these adjustments or consider a different TTS library with better control over speech parameters.
 
61
  adjusted_text = text
62
  if settings['speed'] > 1.0:
63
  adjusted_text = adjusted_text.replace(" ", ".") # Simulate faster speech
@@ -68,7 +73,6 @@ def generate_emotional_speech(text, emotion):
68
  audio_path = "output.wav" # Or any desired filename
69
  tts_model.tts_to_file(text=adjusted_text, file_path=audio_path) # Pass file_path argument
70
  return audio_path
71
-
72
 
73
  # Example usage
74
  emotion = "happy"
@@ -248,19 +252,7 @@ save_path = "/content/drive/My Drive/fine_tuned_tacotron2.pth"
248
  # Save the model's state dictionary using torch.save
249
  torch.save(model.state_dict(), save_path)
250
 
251
-
252
  """Set up the Gradio interface"""
253
- import librosa
254
- import soundfile as sf
255
-
256
- def adjust_pitch(audio_path, pitch_factor):
257
- # Load audio
258
- y, sr = librosa.load(audio_path)
259
- # Adjust pitch
260
- y_shifted = librosa.effects.pitch_shift(y, sr, n_steps=pitch_factor)
261
- # Save adjusted audio
262
- sf.write(audio_path, y_shifted, sr)
263
-
264
 
265
  import gradio as gr
266
  from transformers import pipeline
@@ -274,17 +266,14 @@ tts_model = TTS(model_name="tts_models/en/ljspeech/tacotron2-DDC")
274
 
275
  # Emotion-specific settings for pitch and speed
276
  emotion_settings = {
277
- "neutral": {"pitch": 1.0, "speed": 1.0},
278
- "joy": {"pitch": 1.3, "speed": 1.2},
279
  "sadness": {"pitch": 0.8, "speed": 0.9},
280
- "anger": {"pitch": 1.6, "speed": 1.4},
281
- "fear": {"pitch": 1.2, "speed": 0.95},
282
- "surprise": {"pitch": 1.5, "speed": 1.3},
283
- "disgust": {"pitch": 0.9, "speed": 0.95},
284
- "shame": {"pitch": 0.8, "speed": 0.85},
285
  }
286
 
287
-
288
  # Function to process text or file input and generate audio
289
  def emotion_aware_tts_pipeline(input_text=None, file_input=None):
290
  try:
@@ -299,33 +288,24 @@ def emotion_aware_tts_pipeline(input_text=None, file_input=None):
299
  emotion = emotion_data['label']
300
  confidence = emotion_data['score']
301
 
302
- # Adjust text for speed
303
  settings = emotion_settings.get(emotion.lower(), {"pitch": 1.0, "speed": 1.0})
304
- speed = settings["speed"]
305
  pitch = settings["pitch"]
306
-
307
- if speed > 1.0:
308
- input_text = input_text.replace(" ", ". ") # Faster speech simulation
309
- elif speed < 1.0:
310
- input_text = input_text.replace(" ", "... ") # Slower speech simulation
311
 
312
  # Generate audio
313
  audio_path = "output.wav"
314
- tts_model.tts_to_file(text=input_text, file_path=audio_path)
315
-
316
- # Adjust pitch
317
- pitch_factor = (pitch - 1.0) * 12 # Convert to semitones for librosa
318
- adjust_pitch(audio_path, pitch_factor)
319
 
320
  return f"Detected Emotion: {emotion} (Confidence: {confidence:.2f})", audio_path
321
  else:
322
  return "Please provide input text or file", None
323
  except Exception as e:
 
324
  return f"Error: {str(e)}", None
325
 
326
-
327
  # Define Gradio interface
328
- interface = gr.Interface(
329
  fn=emotion_aware_tts_pipeline,
330
  inputs=[
331
  gr.Textbox(label="Input Text", placeholder="Enter text here"),
@@ -340,4 +320,4 @@ interface = gr.Interface(
340
  )
341
 
342
  # Launch Gradio interface
343
- interface.launch()
 
3
 
4
  """Install Dependencies"""
5
 
6
+ pip install transformers librosa torch soundfile numba numpy TTS datasets gradio protobuf==3.20.3
7
 
8
  """Emotion Detection (Using Text Dataset)
9
 
10
  """
11
 
12
+ !pip install --upgrade numpy tensorflow transformers TTS
13
 
14
+ !pip freeze > requirements.txt
15
 
16
  from transformers import pipeline
17
 
 
43
  def generate_emotional_speech(text, emotion):
44
  # Map emotion to voice modulation parameters (pitch, speed)
45
  emotion_settings = {
46
+ "happy": {"pitch": 1.3, "speed": 1.2}, # Upbeat and energetic
47
+ "joy": {"pitch": 1.2, "speed": 1.1}, # Less exaggerated than 'happy'
48
+ "surprise": {"pitch": 1.5, "speed": 1.3}, # Excitement with high pitch and fast speech
49
+ "sad": {"pitch": 0.8, "speed": 0.9}, # Subdued, slow tone
50
+ "angry": {"pitch": 1.6, "speed": 1.4}, # Intense and sharp
51
+ "fear": {"pitch": 1.2, "speed": 0.95}, # Tense and slightly slow
52
+ "disgust": {"pitch": 0.9, "speed": 0.95}, # Low and deliberate
53
+ "shame": {"pitch": 0.8, "speed": 0.85}, # Quiet, subdued tone
54
+ "neutral": {"pitch": 1.0, "speed": 1.0}, # Baseline conversational tone
55
+ }
56
+
57
+
58
+
59
  # Retrieve pitch and speed based on detected emotion
60
  settings = emotion_settings.get(emotion, {"pitch": 1.0, "speed": 1.0})
61
+ # Generate speech with the TTS model
62
+ # Instead of directly passing speed and pitch to tts_to_file,
63
  # We adjust the text to simulate the effect. This is a temporary solution.
64
+ # You might need to fine-tune these adjustments or consider a different TTS library
65
+ # with better control over speech parameters.
66
  adjusted_text = text
67
  if settings['speed'] > 1.0:
68
  adjusted_text = adjusted_text.replace(" ", ".") # Simulate faster speech
 
73
  audio_path = "output.wav" # Or any desired filename
74
  tts_model.tts_to_file(text=adjusted_text, file_path=audio_path) # Pass file_path argument
75
  return audio_path
 
76
 
77
  # Example usage
78
  emotion = "happy"
 
252
  # Save the model's state dictionary using torch.save
253
  torch.save(model.state_dict(), save_path)
254
 
 
255
  """Set up the Gradio interface"""
 
 
 
 
 
 
 
 
 
 
 
256
 
257
  import gradio as gr
258
  from transformers import pipeline
 
266
 
267
  # Emotion-specific settings for pitch and speed
268
  emotion_settings = {
269
+ "joy": {"pitch": 1.2, "speed": 1.1},
 
270
  "sadness": {"pitch": 0.8, "speed": 0.9},
271
+ "anger": {"pitch": 1.0, "speed": 1.2},
272
+ "fear": {"pitch": 0.9, "speed": 1.0},
273
+ "surprise": {"pitch": 1.3, "speed": 1.2},
274
+ "neutral": {"pitch": 1.0, "speed": 1.0},
 
275
  }
276
 
 
277
  # Function to process text or file input and generate audio
278
  def emotion_aware_tts_pipeline(input_text=None, file_input=None):
279
  try:
 
288
  emotion = emotion_data['label']
289
  confidence = emotion_data['score']
290
 
291
+ # Adjust pitch and speed
292
  settings = emotion_settings.get(emotion.lower(), {"pitch": 1.0, "speed": 1.0})
 
293
  pitch = settings["pitch"]
294
+ speed = settings["speed"]
 
 
 
 
295
 
296
  # Generate audio
297
  audio_path = "output.wav"
298
+ tts_model.tts_to_file(text=input_text, file_path=audio_path, speed=speed, pitch=pitch)
 
 
 
 
299
 
300
  return f"Detected Emotion: {emotion} (Confidence: {confidence:.2f})", audio_path
301
  else:
302
  return "Please provide input text or file", None
303
  except Exception as e:
304
+ # Return error message if something goes wrong
305
  return f"Error: {str(e)}", None
306
 
 
307
  # Define Gradio interface
308
+ iface = gr.Interface(
309
  fn=emotion_aware_tts_pipeline,
310
  inputs=[
311
  gr.Textbox(label="Input Text", placeholder="Enter text here"),
 
320
  )
321
 
322
  # Launch Gradio interface
323
+ iface.launch()