|
import streamlit as st |
|
import librosa |
|
import soundfile as sf |
|
import numpy as np |
|
import scipy.signal as signal |
|
from io import BytesIO |
|
import tempfile |
|
|
|
def pitch_shift_with_formant_preservation(y, sr, n_steps): |
|
|
|
frame_length = 1024 |
|
hop_length = 256 |
|
|
|
|
|
y_shifted = librosa.effects.pitch_shift( |
|
y=y, |
|
sr=sr, |
|
n_steps=n_steps, |
|
bins_per_octave=12, |
|
res_type='kaiser_fast' |
|
) |
|
|
|
return y_shifted |
|
|
|
def enhance_female_characteristics(y, sr, settings): |
|
|
|
y_harmonic, y_percussive = librosa.effects.hpss( |
|
y, |
|
margin=3.0, |
|
kernel_size=31 |
|
) |
|
|
|
|
|
y_enhanced = y_harmonic * settings['harmonic_boost'] + y * (1 - settings['harmonic_boost']) |
|
|
|
|
|
y_filtered = apply_female_eq(y_enhanced, sr) |
|
|
|
return y_filtered |
|
|
|
def apply_female_eq(y, sr): |
|
|
|
|
|
b1, a1 = signal.butter(2, [1000/(sr/2), 2000/(sr/2)], btype='band') |
|
y_filtered = signal.filtfilt(b1, a1, y) |
|
|
|
|
|
b2, a2 = signal.butter(2, [3000/(sr/2), 5000/(sr/2)], btype='band') |
|
y_filtered += 0.3 * signal.filtfilt(b2, a2, y) |
|
|
|
return librosa.util.normalize(y_filtered) |
|
|
|
def add_breathiness(y, sr, amount): |
|
|
|
noise = np.random.normal(0, 0.005, len(y)) |
|
|
|
|
|
b, a = signal.butter(2, 2000/(sr/2), btype='lowpass') |
|
breath_noise = signal.filtfilt(b, a, noise) |
|
|
|
|
|
y_breathy = y * (1 - amount) + breath_noise * amount |
|
return librosa.util.normalize(y_breathy) |
|
|
|
def process_audio_advanced(audio_file, settings): |
|
|
|
y, sr = librosa.load(audio_file, sr=24000) |
|
|
|
|
|
y = librosa.util.normalize(y - np.mean(y)) |
|
|
|
|
|
y_shifted = pitch_shift_with_formant_preservation( |
|
y, |
|
sr, |
|
settings['pitch_shift'] |
|
) |
|
|
|
|
|
y_enhanced = enhance_female_characteristics(y_shifted, sr, settings) |
|
|
|
|
|
if settings['breathiness'] > 0: |
|
y_enhanced = add_breathiness(y_enhanced, sr, settings['breathiness']) |
|
|
|
|
|
y_final = librosa.util.normalize(y_enhanced) |
|
|
|
|
|
y_final = signal.savgol_filter(y_final, 1001, 2) |
|
|
|
return y_final, sr |
|
|
|
def create_voice_preset(preset_name): |
|
presets = { |
|
'Young Female': { |
|
'pitch_shift': 4.0, |
|
'harmonic_boost': 0.3, |
|
'breathiness': 0.15 |
|
}, |
|
'Mature Female': { |
|
'pitch_shift': 3.0, |
|
'harmonic_boost': 0.2, |
|
'breathiness': 0.1 |
|
}, |
|
'Soft Female': { |
|
'pitch_shift': 3.5, |
|
'harmonic_boost': 0.25, |
|
'breathiness': 0.2 |
|
} |
|
} |
|
return presets.get(preset_name) |
|
|
|
st.title("Improved Female Voice Converter") |
|
|
|
uploaded_file = st.file_uploader("Upload an audio file", type=['wav', 'mp3']) |
|
|
|
if uploaded_file is not None: |
|
with tempfile.NamedTemporaryFile(delete=False, suffix='.wav') as tmp_file: |
|
tmp_file.write(uploaded_file.getvalue()) |
|
tmp_path = tmp_file.name |
|
|
|
preset_name = st.selectbox( |
|
"Select Voice Preset", |
|
['Young Female', 'Mature Female', 'Soft Female', 'Custom'] |
|
) |
|
|
|
if preset_name == 'Custom': |
|
settings = { |
|
'pitch_shift': st.slider("Pitch Shift", 0.0, 6.0, 4.0, 0.5), |
|
'harmonic_boost': st.slider("Harmonic Enhancement", 0.0, 0.5, 0.3, 0.05), |
|
'breathiness': st.slider("Breathiness", 0.0, 0.3, 0.15, 0.05) |
|
} |
|
else: |
|
settings = create_voice_preset(preset_name) |
|
|
|
if st.button("Convert Voice"): |
|
with st.spinner("Processing audio..."): |
|
try: |
|
processed_audio, sr = process_audio_advanced(tmp_path, settings) |
|
|
|
|
|
buffer = BytesIO() |
|
sf.write(buffer, processed_audio, sr, format='WAV') |
|
|
|
|
|
st.audio(buffer, format='audio/wav') |
|
|
|
|
|
st.download_button( |
|
label="Download Converted Audio", |
|
data=buffer, |
|
file_name="female_voice_converted.wav", |
|
mime="audio/wav" |
|
) |
|
|
|
except Exception as e: |
|
st.error(f"Error processing audio: {str(e)}") |
|
|
|
st.markdown(""" |
|
### Tips for Best Results: |
|
1. Use high-quality input audio with clear speech |
|
2. Start with presets and adjust if needed |
|
3. Keep pitch shift between 3-5 for most natural results |
|
4. Use minimal breathiness (0.1-0.2) for realistic sound |
|
5. Record in a quiet environment with minimal background noise |
|
|
|
### Recommended Settings: |
|
- For younger female voice: pitch shift 4.0, harmonic boost 0.3 |
|
- For mature female voice: pitch shift 3.0, harmonic boost 0.2 |
|
- For soft female voice: pitch shift 3.5, harmonic boost 0.25 |
|
""") |