|
import streamlit as st |
|
import librosa |
|
import soundfile as sf |
|
import numpy as np |
|
import scipy.signal as signal |
|
from scipy.io import wavfile |
|
from io import BytesIO |
|
import tempfile |
|
|
|
def modify_formants(y, sr, formant_shift_factor=1.2): |
|
|
|
D = librosa.stft(y) |
|
S = np.abs(D) |
|
|
|
|
|
frame_length = 2048 |
|
hop_length = 512 |
|
frames = librosa.util.frame(y, frame_length=frame_length, hop_length=hop_length) |
|
|
|
|
|
modified_frames = [] |
|
for frame in frames.T: |
|
|
|
a = librosa.lpc(frame, order=12) |
|
|
|
|
|
new_a = np.zeros_like(a) |
|
new_a[0] = a[0] |
|
for i in range(1, len(a)): |
|
new_a[i] = a[i] * (formant_shift_factor ** i) |
|
|
|
|
|
modified_frame = signal.lfilter([1], new_a, frame) |
|
modified_frames.append(modified_frame) |
|
|
|
|
|
y_formant = np.concatenate([frame[:hop_length] for frame in modified_frames[:-1]] + |
|
[modified_frames[-1]]) |
|
|
|
return librosa.util.normalize(y_formant) |
|
|
|
def enhance_harmonics(y, sr): |
|
|
|
y_harmonic = librosa.effects.hpss(y)[0] |
|
|
|
|
|
y_enhanced = y_harmonic * 1.2 + y * 0.3 |
|
return librosa.util.normalize(y_enhanced) |
|
|
|
def process_audio_advanced(audio_file, settings): |
|
|
|
y, sr = librosa.load(audio_file) |
|
|
|
|
|
y_shifted = librosa.effects.pitch_shift( |
|
y, |
|
sr=sr, |
|
n_steps=settings['pitch_shift'] |
|
) |
|
|
|
|
|
y_formant = modify_formants( |
|
y_shifted, |
|
sr, |
|
settings['formant_shift'] |
|
) |
|
|
|
|
|
y_harmonic = enhance_harmonics(y_formant, sr) |
|
|
|
|
|
y_vtln = librosa.effects.time_stretch( |
|
y_harmonic, |
|
rate=settings['vtln_factor'] |
|
) |
|
|
|
|
|
y_smooth = signal.savgol_filter(y_vtln, 1001, 2) |
|
|
|
|
|
y_final = librosa.util.normalize(y_smooth) |
|
|
|
return y_final, sr |
|
|
|
def create_voice_preset(preset_name): |
|
presets = { |
|
'Young Female': { |
|
'pitch_shift': 8.0, |
|
'formant_shift': 1.3, |
|
'vtln_factor': 1.1, |
|
'breathiness': 0.3 |
|
}, |
|
'Mature Female': { |
|
'pitch_shift': 6.0, |
|
'formant_shift': 1.2, |
|
'vtln_factor': 1.05, |
|
'breathiness': 0.2 |
|
}, |
|
'Soft Female': { |
|
'pitch_shift': 7.0, |
|
'formant_shift': 1.25, |
|
'vtln_factor': 1.15, |
|
'breathiness': 0.4 |
|
} |
|
} |
|
return presets.get(preset_name) |
|
|
|
def add_breathiness(y, sr, amount=0.3): |
|
|
|
noise = np.random.normal(0, 0.01, len(y)) |
|
noise_filtered = signal.lfilter([1], [1, -0.98], noise) |
|
|
|
|
|
y_breathy = y * (1 - amount) + noise_filtered * amount |
|
return librosa.util.normalize(y_breathy) |
|
|
|
st.title("Advanced Female Voice Converter") |
|
|
|
|
|
uploaded_file = st.file_uploader("Upload an audio file", type=['wav', 'mp3']) |
|
|
|
if uploaded_file is not None: |
|
|
|
with tempfile.NamedTemporaryFile(delete=False, suffix='.wav') as tmp_file: |
|
tmp_file.write(uploaded_file.getvalue()) |
|
tmp_path = tmp_file.name |
|
|
|
|
|
preset_name = st.selectbox( |
|
"Select Voice Preset", |
|
['Young Female', 'Mature Female', 'Soft Female', 'Custom'] |
|
) |
|
|
|
if preset_name == 'Custom': |
|
settings = { |
|
'pitch_shift': st.slider("Pitch Shift", 0.0, 12.0, 8.0, 0.5), |
|
'formant_shift': st.slider("Formant Shift", 1.0, 1.5, 1.2, 0.05), |
|
'vtln_factor': st.slider("Vocal Tract Length", 0.9, 1.2, 1.1, 0.05), |
|
'breathiness': st.slider("Breathiness", 0.0, 1.0, 0.3, 0.1) |
|
} |
|
else: |
|
settings = create_voice_preset(preset_name) |
|
|
|
if st.button("Convert Voice"): |
|
with st.spinner("Processing audio..."): |
|
try: |
|
|
|
processed_audio, sr = process_audio_advanced(tmp_path, settings) |
|
|
|
|
|
processed_audio = add_breathiness( |
|
processed_audio, |
|
sr, |
|
settings['breathiness'] |
|
) |
|
|
|
|
|
buffer = BytesIO() |
|
sf.write(buffer, processed_audio, sr, format='WAV') |
|
|
|
|
|
st.audio(buffer, format='audio/wav') |
|
|
|
|
|
st.download_button( |
|
label="Download Converted Audio", |
|
data=buffer, |
|
file_name="female_voice_converted.wav", |
|
mime="audio/wav" |
|
) |
|
|
|
except Exception as e: |
|
st.error(f"Error processing audio: {str(e)}") |
|
|
|
st.markdown(""" |
|
### Voice Conversion Features: |
|
- Pitch shifting with formant preservation |
|
- Harmonic enhancement |
|
- Vocal tract length modification |
|
- Natural breathiness addition |
|
- Multiple voice presets |
|
- Custom parameter controls |
|
|
|
### Tips for Best Results: |
|
1. Start with a clear audio recording |
|
2. Try different presets to find the best match |
|
3. For custom settings: |
|
- Pitch shift: 6-8 for natural female voice |
|
- Formant shift: 1.1-1.3 for feminine resonance |
|
- Vocal tract length: 1.05-1.15 for realistic results |
|
- Breathiness: 0.2-0.4 for natural sound |
|
""") |