Anita-19 commited on
Commit
4b3b013
Β·
verified Β·
1 Parent(s): f16c455

Update main.py

Browse files
Files changed (1) hide show
  1. main.py +18 -26
main.py CHANGED
@@ -250,28 +250,6 @@ save_path = "/content/drive/My Drive/fine_tuned_tacotron2.pth"
250
 
251
  # Save the model's state dictionary using torch.save
252
  torch.save(model.state_dict(), save_path)
253
-
254
-
255
- import librosa
256
- import soundfile as sf
257
-
258
- def adjust_pitch(audio_path, pitch_factor):
259
- # Load audio
260
- y, sr = librosa.load(audio_path)
261
- # Adjust pitch
262
- y_shifted = librosa.effects.pitch_shift(y, sr, n_steps=pitch_factor)
263
- # Save adjusted audio
264
- sf.write(audio_path, y_shifted, sr)
265
-
266
- def adjust_speed(audio_path, speed_factor):
267
- # Load the audio file
268
- y, sr = librosa.load(audio_path)
269
-
270
- # Adjust the speed (this alters the duration of the audio)
271
- y_speeded = librosa.effects.time_stretch(y, speed_factor)
272
-
273
- # Save the adjusted audio
274
- sf.write(audio_path, y_speeded, sr)
275
 
276
 
277
  """Set up the Gradio interface"""
@@ -298,8 +276,18 @@ emotion_settings = {
298
  "shame": {"pitch": 0.8, "speed": 0.85},
299
  }
300
 
 
 
 
 
 
 
 
 
 
 
 
301
 
302
- # Function to process text or file input and generate audio
303
  def emotion_aware_tts_pipeline(input_text=None, file_input=None):
304
  try:
305
  # Get text from input or file
@@ -320,9 +308,13 @@ def emotion_aware_tts_pipeline(input_text=None, file_input=None):
320
 
321
  # Generate audio
322
  audio_path = "output.wav"
323
- tts_model.tts_to_file(text=input_text, file_path=audio_path, speed=speed, pitch=pitch)
324
-
325
 
 
 
 
 
 
326
 
327
  return f"Detected Emotion: {emotion} (Confidence: {confidence:.2f})", audio_path
328
  else:
@@ -330,7 +322,7 @@ def emotion_aware_tts_pipeline(input_text=None, file_input=None):
330
  except Exception as e:
331
  return f"Error: {str(e)}", None
332
 
333
-
334
 
335
  # Define Gradio interface
336
  iface = gr.Interface(
 
250
 
251
  # Save the model's state dictionary using torch.save
252
  torch.save(model.state_dict(), save_path)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
253
 
254
 
255
  """Set up the Gradio interface"""
 
276
  "shame": {"pitch": 0.8, "speed": 0.85},
277
  }
278
 
279
+ import soundfile as sf
280
+
281
+ def adjust_audio_speed(audio_path, speed_factor):
282
+ y, sr = librosa.load(audio_path)
283
+ y_speeded = librosa.effects.time_stretch(y, speed_factor)
284
+ sf.write(audio_path, y_speeded, sr)
285
+
286
+ def adjust_audio_pitch(audio_path, pitch_factor):
287
+ y, sr = librosa.load(audio_path)
288
+ y_shifted = librosa.effects.pitch_shift(y, sr, n_steps=pitch_factor)
289
+ sf.write(audio_path, y_shifted, sr)
290
 
 
291
  def emotion_aware_tts_pipeline(input_text=None, file_input=None):
292
  try:
293
  # Get text from input or file
 
308
 
309
  # Generate audio
310
  audio_path = "output.wav"
311
+ tts_model.tts_to_file(text=input_text, file_path=audio_path)
 
312
 
313
+ # Adjust pitch and speed using librosa
314
+ if pitch != 1.0:
315
+ adjust_audio_pitch(audio_path, pitch)
316
+ if speed != 1.0:
317
+ adjust_audio_speed(audio_path, speed)
318
 
319
  return f"Detected Emotion: {emotion} (Confidence: {confidence:.2f})", audio_path
320
  else:
 
322
  except Exception as e:
323
  return f"Error: {str(e)}", None
324
 
325
+
326
 
327
  # Define Gradio interface
328
  iface = gr.Interface(