Anita-19 commited on
Commit
c91a77d
·
verified ·
1 Parent(s): afe9f6d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +11 -55
app.py CHANGED
@@ -20,33 +20,10 @@ def adjust_speed(audio_path, speed_factor):
20
  sf.write(audio_path, y_speeded, sr)
21
 
22
 
23
- """Set up the Gradio interface"""
24
-
25
  import gradio as gr
26
- from transformers import pipeline
27
- from TTS.api import TTS
28
-
29
- # Load pre-trained emotion detection model
30
- emotion_classifier = pipeline("text-classification", model="bhadresh-savani/distilbert-base-uncased-emotion")
31
-
32
- # Load TTS model
33
- tts_model = TTS(model_name="tts_models/en/ljspeech/tacotron2-DDC")
34
 
35
- # Emotion-specific settings for pitch and speed
36
- emotion_settings = {
37
- "neutral": {"pitch": 1.0, "speed": 1.0},
38
- "joy": {"pitch": 1.3, "speed": 1.2},
39
- "sadness": {"pitch": 0.8, "speed": 0.9},
40
- "anger": {"pitch": 1.6, "speed": 1.4},
41
- "fear": {"pitch": 1.2, "speed": 0.95},
42
- "surprise": {"pitch": 1.5, "speed": 1.3},
43
- "disgust": {"pitch": 0.9, "speed": 0.95},
44
- "shame": {"pitch": 0.8, "speed": 0.85},
45
- }
46
-
47
-
48
- # Function to process text or file input and generate audio
49
- def emotion_aware_tts_pipeline(input_text=None, file_input=None):
50
  try:
51
  # Get text from input or file
52
  if file_input:
@@ -59,19 +36,18 @@ def emotion_aware_tts_pipeline(input_text=None, file_input=None):
59
  emotion = emotion_data['label']
60
  confidence = emotion_data['score']
61
 
62
- # Adjust text for speed
63
  settings = emotion_settings.get(emotion.lower(), {"pitch": 1.0, "speed": 1.0})
64
- speed = settings["speed"]
65
  pitch = settings["pitch"]
66
-
67
 
68
  # Generate audio
69
  audio_path = "output.wav"
70
- tts_model.tts_to_file(text=input_text, file_path=audio_path)
 
71
 
72
- # Adjust pitch
73
- pitch_factor = (pitch - 1.0) * 12 # Convert to semitones for librosa
74
- adjust_pitch(audio_path, pitch_factor)
75
 
76
  return f"Detected Emotion: {emotion} (Confidence: {confidence:.2f})", audio_path
77
  else:
@@ -79,12 +55,9 @@ def emotion_aware_tts_pipeline(input_text=None, file_input=None):
79
  except Exception as e:
80
  return f"Error: {str(e)}", None
81
 
82
-
83
-
84
-
85
  # Define Gradio interface
86
- interface = gr.Interface(
87
- fn=emotion_aware_tts_pipeline,
88
  inputs=[
89
  gr.Textbox(label="Input Text", placeholder="Enter text here"),
90
  gr.File(label="Upload a Text File")
@@ -98,21 +71,4 @@ interface = gr.Interface(
98
  )
99
 
100
  # Launch Gradio interface
101
- interface.launch()# Define Gradio interface
102
- interface = gr.Interface(
103
- fn=emotion_aware_tts_pipeline,
104
- inputs=[
105
- gr.Textbox(label="Input Text", placeholder="Enter text here"),
106
- gr.File(label="Upload a Text File")
107
- ],
108
- outputs=[
109
- gr.Textbox(label="Detected Emotion"),
110
- gr.Audio(label="Generated Audio")
111
- ],
112
- title="Emotion-Aware Text-to-Speech",
113
- description="Input text or upload a text file to detect the emotion and generate audio with emotion-aware modulation."
114
- )
115
-
116
- # Export the interface object so Hugging Face can launch it
117
- if __name__ == "__main__":
118
- interface.launch()
 
20
  sf.write(audio_path, y_speeded, sr)
21
 
22
 
23
+ # Set up the Gradio interface
 
24
  import gradio as gr
 
 
 
 
 
 
 
 
25
 
26
+ def emotion_aware_tts_pipeline_gradio(input_text=None, file_input=None):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
27
  try:
28
  # Get text from input or file
29
  if file_input:
 
36
  emotion = emotion_data['label']
37
  confidence = emotion_data['score']
38
 
39
+ # Adjust pitch and speed
40
  settings = emotion_settings.get(emotion.lower(), {"pitch": 1.0, "speed": 1.0})
 
41
  pitch = settings["pitch"]
42
+ speed = settings["speed"]
43
 
44
  # Generate audio
45
  audio_path = "output.wav"
46
+ mel_spectrogram = tts_model.get_mel_spectrogram(input_text)
47
+ audio = vocoder.decode(mel_spectrogram)
48
 
49
+ # Post-processing: adjust pitch and speed
50
+ adjust_pitch_and_speed(audio_path, pitch_factor=pitch, speed_factor=speed)
 
51
 
52
  return f"Detected Emotion: {emotion} (Confidence: {confidence:.2f})", audio_path
53
  else:
 
55
  except Exception as e:
56
  return f"Error: {str(e)}", None
57
 
 
 
 
58
  # Define Gradio interface
59
+ iface = gr.Interface(
60
+ fn=emotion_aware_tts_pipeline_gradio,
61
  inputs=[
62
  gr.Textbox(label="Input Text", placeholder="Enter text here"),
63
  gr.File(label="Upload a Text File")
 
71
  )
72
 
73
  # Launch Gradio interface
74
+ iface.launch()