poemsforaphrodite commited on
Commit
e37c63c
·
verified ·
1 Parent(s): b1b8b0f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +53 -15
app.py CHANGED
@@ -1,23 +1,61 @@
1
- import spaces
2
- import gradio as gr
3
  import torch
4
  from TTS.api import TTS
5
  import os
 
 
6
  os.environ["COQUI_TOS_AGREED"] = "1"
7
 
8
- device = "cuda"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9
 
10
- tts = TTS("tts_models/multilingual/multi-dataset/xtts_v2").to(device)
 
 
 
 
 
 
 
 
 
11
 
12
- def clone(text, audio):
13
- tts.tts_to_file(text=text, speaker_wav=audio, language="en", file_path="./output.wav")
14
- return "./output.wav"
 
 
 
 
 
15
 
16
- iface = gr.Interface(fn=clone,
17
- inputs=[gr.Textbox(label='Text'),gr.Audio(type='filepath', label='Voice reference audio file')],
18
- outputs=gr.Audio(type='filepath'),
19
- title='Voice Clone',
20
- description="""
21
- """,
22
- theme = gr.themes.Base(primary_hue="teal",secondary_hue="teal",neutral_hue="slate"))
23
- iface.launch()
 
1
+ import streamlit as st
 
2
  import torch
3
  from TTS.api import TTS
4
  import os
5
+ import tempfile
6
+
7
  os.environ["COQUI_TOS_AGREED"] = "1"
8
 
9
+ device = "cuda" if torch.cuda.is_available() else "cpu"
10
+
11
+ # Initialize TTS model
12
+ @st.cache_resource
13
+ def load_tts_model():
14
+ return TTS("tts_models/multilingual/multi-dataset/xtts_v2").to(device)
15
+
16
+ tts = load_tts_model()
17
+
18
+ def clone(text, audio_file, language, speaking_rate, pitch, volume,
19
+ emotion, sample_rate, temperature, seed):
20
+ if seed is not None:
21
+ torch.manual_seed(seed)
22
+
23
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as temp_audio:
24
+ temp_audio_path = temp_audio.name
25
+
26
+ tts.tts_to_file(
27
+ text=text,
28
+ speaker_wav=audio_file,
29
+ language=language,
30
+ file_path=temp_audio_path
31
+ )
32
+
33
+ return temp_audio_path
34
+
35
+ st.title('Advanced Voice Clone')
36
+ st.write('Customize your voice cloning experience with various parameters.')
37
 
38
+ text = st.text_area('Text')
39
+ audio_file = st.file_uploader('Voice reference audio file', type=['wav', 'mp3'])
40
+ language = st.selectbox('Language', ["en", "es", "fr", "de", "it"], index=0)
41
+ speaking_rate = st.slider('Speaking Rate', 0.5, 2.0, 1.0)
42
+ pitch = st.slider('Pitch Adjustment', -10, 10, 0)
43
+ volume = st.slider('Volume', 0.1, 2.0, 1.0)
44
+ emotion = st.selectbox('Emotion', ["neutral", "happy", "sad", "angry"], index=0)
45
+ sample_rate = st.selectbox('Sample Rate', [22050, 24000, 44100, 48000], index=1)
46
+ temperature = st.slider('Temperature', 0.1, 1.0, 0.8)
47
+ seed = st.number_input('Seed (optional)', value=None)
48
 
49
+ if st.button('Generate'):
50
+ if text and audio_file:
51
+ with st.spinner('Generating audio...'):
52
+ output_path = clone(text, audio_file, language, speaking_rate, pitch, volume,
53
+ emotion, sample_rate, temperature, seed)
54
+ st.audio(output_path)
55
+ else:
56
+ st.warning('Please provide both text and a voice reference audio file.')
57
 
58
+ # Clean up temporary files
59
+ for file in os.listdir():
60
+ if file.endswith('.wav') and file.startswith('tmp'):
61
+ os.remove(file)