Anita-19 commited on
Commit
cac8321
Β·
verified Β·
1 Parent(s): 331d92d

Update main.py

Browse files
Files changed (1) hide show
  1. main.py +20 -4
main.py CHANGED
@@ -296,7 +296,18 @@ emotion_settings = {
296
  "neutral": {"pitch": 1.0, "speed": 1.0},
297
  }
298
 
299
- # Function to process text or file input and generate audio
 
 
 
 
 
 
 
 
 
 
 
300
  def emotion_aware_tts_pipeline(input_text=None, file_input=None):
301
  try:
302
  # Get text from input or file
@@ -304,7 +315,7 @@ def emotion_aware_tts_pipeline(input_text=None, file_input=None):
304
  with open(file_input.name, 'r') as file:
305
  input_text = file.read()
306
 
307
- if input_text:
308
  # Detect emotion
309
  emotion_data = emotion_classifier(input_text)[0]
310
  emotion = emotion_data['label']
@@ -317,16 +328,21 @@ def emotion_aware_tts_pipeline(input_text=None, file_input=None):
317
 
318
  # Generate audio
319
  audio_path = "output.wav"
320
- tts_model.tts_to_file(text=input_text, file_path=audio_path, speed=speed, pitch=pitch)
321
 
 
 
 
 
 
322
 
323
  return f"Detected Emotion: {emotion} (Confidence: {confidence:.2f})", audio_path
324
  else:
325
  return "Please provide input text or file", None
326
  except Exception as e:
327
- # Return error message if something goes wrong
328
  return f"Error: {str(e)}", None
329
 
 
330
  # Define Gradio interface
331
  iface = gr.Interface(
332
  fn=emotion_aware_tts_pipeline,
 
296
  "neutral": {"pitch": 1.0, "speed": 1.0},
297
  }
298
 
299
+ import soundfile as sf
300
+
301
+ def adjust_audio_speed(audio_path, speed_factor):
302
+ y, sr = librosa.load(audio_path)
303
+ y_speeded = librosa.effects.time_stretch(y, speed_factor)
304
+ sf.write(audio_path, y_speeded, sr)
305
+
306
+ def adjust_audio_pitch(audio_path, pitch_factor):
307
+ y, sr = librosa.load(audio_path)
308
+ y_shifted = librosa.effects.pitch_shift(y, sr, n_steps=pitch_factor)
309
+ sf.write(audio_path, y_shifted, sr)
310
+
311
  def emotion_aware_tts_pipeline(input_text=None, file_input=None):
312
  try:
313
  # Get text from input or file
 
315
  with open(file_input.name, 'r') as file:
316
  input_text = file.read()
317
 
318
+ if input_text:
319
  # Detect emotion
320
  emotion_data = emotion_classifier(input_text)[0]
321
  emotion = emotion_data['label']
 
328
 
329
  # Generate audio
330
  audio_path = "output.wav"
331
+ tts_model.tts_to_file(text=input_text, file_path=audio_path)
332
 
333
+ # Adjust pitch and speed using librosa
334
+ if pitch != 1.0:
335
+ adjust_audio_pitch(audio_path, pitch)
336
+ if speed != 1.0:
337
+ adjust_audio_speed(audio_path, speed)
338
 
339
  return f"Detected Emotion: {emotion} (Confidence: {confidence:.2f})", audio_path
340
  else:
341
  return "Please provide input text or file", None
342
  except Exception as e:
 
343
  return f"Error: {str(e)}", None
344
 
345
+
346
  # Define Gradio interface
347
  iface = gr.Interface(
348
  fn=emotion_aware_tts_pipeline,