"""Set up the Gradio interface""" import gradio as gr from transformers import pipeline from TTS.api import TTS # Load pre-trained emotion detection model emotion_classifier = pipeline("text-classification", model="bhadresh-savani/distilbert-base-uncased-emotion") # Load TTS model tts_model = TTS(model_name="tts_models/en/ljspeech/tacotron2-DDC") # Emotion-specific settings for pitch and speed emotion_settings = { "neutral": {"pitch": 1.0, "speed": 1.0}, "joy": {"pitch": 1.3, "speed": 1.2}, "sadness": {"pitch": 0.8, "speed": 0.9}, "anger": {"pitch": 1.6, "speed": 1.4}, "fear": {"pitch": 1.2, "speed": 0.95}, "surprise": {"pitch": 1.5, "speed": 1.3}, "disgust": {"pitch": 0.9, "speed": 0.95}, "shame": {"pitch": 0.8, "speed": 0.85}, } import librosa import soundfile as sf def adjust_audio_speed(audio_path, speed_factor): y, sr = librosa.load(audio_path) y_speeded = librosa.effects.time_stretch(y, speed_factor) sf.write(audio_path, y_speeded, sr) def adjust_audio_pitch(audio_path, pitch_factor): y, sr = librosa.load(audio_path) y_shifted = librosa.effects.pitch_shift(y, sr, n_steps=pitch_factor) sf.write(audio_path, y_shifted, sr) def emotion_aware_tts_pipeline(input_text=None, file_input=None): try: # Get text from input or file if file_input: with open(file_input.name, 'r') as file: input_text = file.read() if input_text: # Detect emotion emotion_data = emotion_classifier(input_text)[0] emotion = emotion_data['label'] confidence = emotion_data['score'] # Adjust pitch and speed settings = emotion_settings.get(emotion.lower(), {"pitch": 1.0, "speed": 1.0}) pitch = settings["pitch"] speed = settings["speed"] # Generate audio audio_path = "output.wav" tts_model.tts_to_file(text=input_text, file_path=audio_path) # Adjust pitch and speed using librosa if pitch != 1.0: adjust_audio_pitch(audio_path, pitch) if speed != 1.0: adjust_audio_speed(audio_path, speed) return f"Detected Emotion: {emotion} (Confidence: {confidence:.2f})", audio_path else: return "Please provide input text or file", None except Exception as e: return f"Error: {str(e)}", None # Define Gradio interface iface = gr.Interface( fn=emotion_aware_tts_pipeline, inputs=[ gr.Textbox(label="Input Text", placeholder="Enter text here"), gr.File(label="Upload a Text File") ], outputs=[ gr.Textbox(label="Detected Emotion"), gr.Audio(label="Generated Audio") ], title="Emotion-Aware Text-to-Speech", description="Input text or upload a text file to detect the emotion and generate audio with emotion-aware modulation." ) # Launch Gradio interface iface.launch()