Spaces:

Artificial-superintelligence
/

Testvoice

Running

App Files Files Community

Artificial-superintelligence commited on Oct 22, 2024

Commit

7fcd4dd

verified ·

1 Parent(s): d535b69

Create app.py

Browse files

Files changed (1) hide show

app.py +146 -0

app.py ADDED Viewed

	@@ -0,0 +1,146 @@

+import gradio as gr
+import numpy as np
+import librosa
+import soundfile as sf
+from TTS.api import TTS
+import torch
+import os
+import tempfile
+# Initialize TTS model
+tts = TTS("tts_models/multilingual/multi-dataset/your_tts", progress_bar=False).to("cuda" if torch.cuda.is_available() else "cpu")
+def load_audio(audio_path):
+    audio, sr = librosa.load(audio_path, sr=None)
+    return audio, sr
+def save_audio(audio, sr, path):
+    sf.write(path, audio, sr)
+def pitch_shift(audio, sr, n_steps):
+    return librosa.effects.pitch_shift(audio, sr=sr, n_steps=n_steps)
+def change_voice(audio_path, pitch_shift_amount, formant_shift_amount):
+    # Load the audio
+    audio, sr = load_audio(audio_path)
+    # Apply pitch shifting
+    pitched_audio = pitch_shift(audio, sr, pitch_shift_amount)
+    # Use TTS model for voice conversion
+    with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_file:
+        save_audio(pitched_audio, sr, temp_file.name)
+        converted_audio_path = tts.voice_conversion(
+            source_wav=temp_file.name,
+            target_wav="path/to/female_target_voice.wav",  # You need to provide a female target voice file
+            output_wav=None
+        )
+    # Load the converted audio
+    converted_audio, _ = load_audio(converted_audio_path)
+    # Apply formant shifting (simplified approach)
+    formant_shifted_audio = librosa.effects.pitch_shift(converted_audio, sr=sr, n_steps=formant_shift_amount)
+    # Clean up temporary files
+    os.unlink(temp_file.name)
+    os.unlink(converted_audio_path)
+    return (sr, formant_shifted_audio)
+def process_audio(audio_file, pitch_shift_amount, formant_shift_amount):
+    sr, audio = change_voice(audio_file.name, pitch_shift_amount, formant_shift_amount)
+    output_path = "output_voice.wav"
+    save_audio(audio, sr, output_path)
+    return output_path
+# Custom CSS for improved design
+custom_css = """
+.gradio-container {
+    background-color: #f0f4f8;
+}
+.container {
+    max-width: 900px;
+    margin: auto;
+    padding: 20px;
+    border-radius: 10px;
+    background-color: white;
+    box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1);
+}
+h1 {
+    color: #2c3e50;
+    text-align: center;
+    font-size: 2.5em;
+    margin-bottom: 20px;
+}
+.description {
+    text-align: center;
+    color: #34495e;
+    margin-bottom: 30px;
+}
+.input-section, .output-section {
+    background-color: #ecf0f1;
+    padding: 20px;
+    border-radius: 8px;
+    margin-bottom: 20px;
+}
+.input-section h3, .output-section h3 {
+    color: #2980b9;
+    margin-bottom: 15px;
+}
+"""
+# Gradio Interface with improved design
+with gr.Blocks(css=custom_css) as demo:
+    gr.HTML(
+        """
+        <div style="text-align: center; max-width: 800px; margin: 0 auto;">
+            <div style="display: inline-flex; align-items: center; gap: 0.8rem; font-size: 1.75rem;">
+                <svg xmlns="http://www.w3.org/2000/svg" width="1em" height="1em" fill="currentColor" viewBox="0 0 16 16" style="vertical-align: middle;">
+                    <path d="M3.5 6.5A.5.5 0 0 1 4 7v1a4 4 0 0 0 8 0V7a.5.5 0 0 1 1 0v1a5 5 0 0 1-4.5 4.975V15h3a.5.5 0 0 1 0 1h-7a.5.5 0 0 1 0-1h3v-2.025A5 5 0 0 1 3 8V7a.5.5 0 0 1 .5-.5z"/>
+                    <path d="M10 8a2 2 0 1 1-4 0V3a2 2 0 1 1 4 0v5zM8 0a3 3 0 0 0-3 3v5a3 3 0 0 0 6 0V3a3 3 0 0 0-3-3z"/>
+                </svg>
+                <h1 style="font-weight: 900; margin-bottom: 7px;">
+                    AI Voice Changer
+                </h1>
+            </div>
+            <p class="description">Transform any voice into a realistic female voice using advanced AI technology</p>
+        </div>
+        """
+    )
+    with gr.Row():
+        with gr.Column(elem_classes="input-section"):
+            gr.Markdown("### Input")
+            audio_input = gr.Audio(type="filepath", label="Upload Voice")
+            pitch_shift = gr.Slider(-12, 12, step=0.5, label="Pitch Shift", value=0)
+            formant_shift = gr.Slider(-5, 5, step=0.1, label="Formant Shift", value=0)
+            submit_btn = gr.Button("Transform Voice", variant="primary")
+        with gr.Column(elem_classes="output-section"):
+            gr.Markdown("### Output")
+            audio_output = gr.Audio(label="Transformed Voice")
+    submit_btn.click(
+        fn=process_audio,
+        inputs=[audio_input, pitch_shift, formant_shift],
+        outputs=audio_output,
+    )
+    gr.Markdown(
+        """
+        ### How to use:
+        1. Upload an audio file containing the voice you want to transform.
+        2. Adjust the Pitch Shift and Formant Shift sliders to fine-tune the voice (optional).
+        3. Click the "Transform Voice" button to process the audio.
+        4. Listen to the transformed voice in the output section.
+        5. Download the transformed audio file if desired.
+        Note: This application uses AI to transform voices. The quality of the output may vary depending on the input audio quality and the chosen settings.
+        """
+    )
+if __name__ == "__main__":
+    demo.launch()