Spaces:
Running
on
Zero
Running
on
Zero
Reduce some steps
Browse files- .gitignore +1 -0
- .python-version +1 -0
- app.py +57 -78
- requirements.txt +0 -1
.gitignore
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
.venv
|
.python-version
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
3.10
|
app.py
CHANGED
@@ -6,88 +6,67 @@ import os
|
|
6 |
from typing import Optional, Dict, Any, List, Tuple
|
7 |
|
8 |
# Available OuteTTS models based on the documentation
|
9 |
-
MODELS = {v.value:
|
10 |
|
11 |
-
def initialize_interface(model_name: str) ->
|
12 |
-
"""Initialize the OuteTTS interface with selected model
|
13 |
-
|
14 |
-
|
15 |
-
|
16 |
-
|
17 |
-
|
18 |
-
|
19 |
-
|
20 |
-
|
21 |
-
|
22 |
-
|
23 |
-
|
24 |
-
|
25 |
-
|
26 |
-
status = f"β
Successfully initialized {model_name}"
|
27 |
-
info = f"Model: {model_name}\nStatus: Ready"
|
28 |
-
|
29 |
-
return status, info, interface
|
30 |
-
|
31 |
-
except Exception as e:
|
32 |
-
error_msg = f"β Error initializing model: {str(e)}"
|
33 |
-
return error_msg, f"Error: {str(e)}", None
|
34 |
|
35 |
-
def create_speaker_from_audio(audio_file, interface_state) -> Tuple[str,
|
36 |
"""Create a speaker profile from uploaded audio file."""
|
37 |
if interface_state is None:
|
38 |
-
|
39 |
|
40 |
if audio_file is None:
|
41 |
-
|
42 |
-
|
43 |
-
|
44 |
-
|
45 |
-
|
46 |
-
|
47 |
-
|
48 |
-
|
49 |
-
|
50 |
-
status = "β
Speaker profile created successfully!"
|
51 |
-
|
52 |
-
return status, speaker_json, speaker
|
53 |
-
|
54 |
-
except Exception as e:
|
55 |
-
error_msg = f"β Error creating speaker: {str(e)}"
|
56 |
-
return error_msg, f"Error: {str(e)}", None
|
57 |
|
58 |
|
59 |
|
60 |
-
def generate_sample_audio(text: str, temperature: float, interface_state, speaker_state) ->
|
61 |
"""Generate sample audio using the current speaker."""
|
62 |
if interface_state is None:
|
63 |
-
|
64 |
|
65 |
if speaker_state is None:
|
66 |
-
|
67 |
|
68 |
if not text.strip():
|
69 |
-
|
70 |
-
|
71 |
-
|
72 |
-
|
73 |
-
|
74 |
-
|
75 |
-
|
76 |
-
|
77 |
-
|
78 |
-
temperature=temperature
|
79 |
-
)
|
80 |
)
|
81 |
)
|
82 |
-
|
83 |
-
|
84 |
-
|
85 |
-
|
86 |
-
|
87 |
-
|
88 |
-
except Exception as e:
|
89 |
-
error_msg = f"β Error generating audio: {str(e)}"
|
90 |
-
return error_msg, None
|
91 |
|
92 |
# Create the Gradio interface
|
93 |
with gr.Blocks(title="OuteTTS Speaker Creator", theme=gr.themes.Soft()) as demo:
|
@@ -100,19 +79,15 @@ with gr.Blocks(title="OuteTTS Speaker Creator", theme=gr.themes.Soft()) as demo:
|
|
100 |
|
101 |
with gr.Row():
|
102 |
with gr.Column(scale=1):
|
103 |
-
gr.Markdown("## π§ Model
|
104 |
|
105 |
model_dropdown = gr.Dropdown(
|
106 |
choices=list(MODELS.keys()),
|
107 |
-
value=list(MODELS.keys())[
|
108 |
label="Select OuteTTS Model",
|
109 |
info="Choose the model variant to use"
|
110 |
)
|
111 |
|
112 |
-
init_button = gr.Button("Initialize Model", variant="primary")
|
113 |
-
init_status = gr.Textbox(label="Status", interactive=False)
|
114 |
-
init_info = gr.Textbox(label="Model Info", interactive=False, lines=3)
|
115 |
-
|
116 |
with gr.Column(scale=1):
|
117 |
gr.Markdown("## π΅ Speaker Creation")
|
118 |
|
@@ -122,8 +97,6 @@ with gr.Blocks(title="OuteTTS Speaker Creator", theme=gr.themes.Soft()) as demo:
|
|
122 |
sources=["upload", "microphone"]
|
123 |
)
|
124 |
create_button = gr.Button("Create Speaker Profile", variant="primary")
|
125 |
-
|
126 |
-
speaker_status = gr.Textbox(label="Speaker Status", interactive=False)
|
127 |
|
128 |
with gr.Row():
|
129 |
with gr.Column():
|
@@ -156,26 +129,32 @@ with gr.Blocks(title="OuteTTS Speaker Creator", theme=gr.themes.Soft()) as demo:
|
|
156 |
)
|
157 |
|
158 |
generate_button = gr.Button("Generate Sample Audio", variant="primary")
|
159 |
-
generate_status = gr.Textbox(label="Generation Status", interactive=False)
|
160 |
sample_audio = gr.Audio(label="Generated Audio", interactive=False)
|
161 |
|
162 |
# Event handlers
|
163 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
164 |
fn=initialize_interface,
|
165 |
inputs=[model_dropdown],
|
166 |
-
outputs=[
|
167 |
)
|
168 |
|
169 |
create_button.click(
|
170 |
fn=create_speaker_from_audio,
|
171 |
inputs=[audio_upload, interface_state],
|
172 |
-
outputs=[
|
173 |
)
|
174 |
|
175 |
generate_button.click(
|
176 |
fn=generate_sample_audio,
|
177 |
inputs=[test_text, temperature_slider, interface_state, speaker_state],
|
178 |
-
outputs=[
|
179 |
)
|
180 |
|
181 |
demo.launch()
|
|
|
6 |
from typing import Optional, Dict, Any, List, Tuple
|
7 |
|
8 |
# Available OuteTTS models based on the documentation
|
9 |
+
MODELS = {v.value: v for _, v in outetts.Models.__members__.items()}
|
10 |
|
11 |
+
def initialize_interface(model_name: str) -> Any:
|
12 |
+
"""Initialize the OuteTTS interface with selected model."""
|
13 |
+
model = MODELS[model_name]
|
14 |
+
|
15 |
+
# Configure the model
|
16 |
+
config = outetts.ModelConfig.auto_config(
|
17 |
+
model=model,
|
18 |
+
backend=outetts.Backend.LLAMACPP,
|
19 |
+
quantization=outetts.LlamaCppQuantization.FP16,
|
20 |
+
)
|
21 |
+
|
22 |
+
# Initialize the interface
|
23 |
+
interface = outetts.Interface(config=config)
|
24 |
+
return interface
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
25 |
|
26 |
+
def create_speaker_from_audio(audio_file, interface_state) -> Tuple[str, Any]:
|
27 |
"""Create a speaker profile from uploaded audio file."""
|
28 |
if interface_state is None:
|
29 |
+
raise gr.Error("Please select a model first")
|
30 |
|
31 |
if audio_file is None:
|
32 |
+
raise gr.Error("Please upload an audio file")
|
33 |
+
|
34 |
+
# Create speaker profile from audio
|
35 |
+
speaker = interface_state.create_speaker(audio_file)
|
36 |
+
|
37 |
+
# Convert speaker dict to formatted JSON
|
38 |
+
speaker_json = json.dumps(speaker, indent=2, ensure_ascii=False)
|
39 |
+
|
40 |
+
return speaker_json, speaker
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
41 |
|
42 |
|
43 |
|
44 |
+
def generate_sample_audio(text: str, temperature: float, interface_state, speaker_state) -> Optional[str]:
|
45 |
"""Generate sample audio using the current speaker."""
|
46 |
if interface_state is None:
|
47 |
+
raise gr.Error("Please select a model first")
|
48 |
|
49 |
if speaker_state is None:
|
50 |
+
raise gr.Error("Please create a speaker first")
|
51 |
|
52 |
if not text.strip():
|
53 |
+
raise gr.Error("Please enter text to generate")
|
54 |
+
|
55 |
+
# Generate audio
|
56 |
+
output = interface_state.generate(
|
57 |
+
config=outetts.GenerationConfig(
|
58 |
+
text=text,
|
59 |
+
speaker=speaker_state,
|
60 |
+
sampler_config=outetts.SamplerConfig(
|
61 |
+
temperature=temperature
|
|
|
|
|
62 |
)
|
63 |
)
|
64 |
+
)
|
65 |
+
|
66 |
+
# Save to temporary file
|
67 |
+
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as f:
|
68 |
+
output.save(f.name)
|
69 |
+
return f.name
|
|
|
|
|
|
|
70 |
|
71 |
# Create the Gradio interface
|
72 |
with gr.Blocks(title="OuteTTS Speaker Creator", theme=gr.themes.Soft()) as demo:
|
|
|
79 |
|
80 |
with gr.Row():
|
81 |
with gr.Column(scale=1):
|
82 |
+
gr.Markdown("## π§ Model Selection")
|
83 |
|
84 |
model_dropdown = gr.Dropdown(
|
85 |
choices=list(MODELS.keys()),
|
86 |
+
value=list(MODELS.keys())[-1], # Default to last model
|
87 |
label="Select OuteTTS Model",
|
88 |
info="Choose the model variant to use"
|
89 |
)
|
90 |
|
|
|
|
|
|
|
|
|
91 |
with gr.Column(scale=1):
|
92 |
gr.Markdown("## π΅ Speaker Creation")
|
93 |
|
|
|
97 |
sources=["upload", "microphone"]
|
98 |
)
|
99 |
create_button = gr.Button("Create Speaker Profile", variant="primary")
|
|
|
|
|
100 |
|
101 |
with gr.Row():
|
102 |
with gr.Column():
|
|
|
129 |
)
|
130 |
|
131 |
generate_button = gr.Button("Generate Sample Audio", variant="primary")
|
|
|
132 |
sample_audio = gr.Audio(label="Generated Audio", interactive=False)
|
133 |
|
134 |
# Event handlers
|
135 |
+
# Initialize default model on page load
|
136 |
+
demo.load(
|
137 |
+
fn=initialize_interface,
|
138 |
+
inputs=[model_dropdown],
|
139 |
+
outputs=[interface_state]
|
140 |
+
)
|
141 |
+
|
142 |
+
model_dropdown.change(
|
143 |
fn=initialize_interface,
|
144 |
inputs=[model_dropdown],
|
145 |
+
outputs=[interface_state]
|
146 |
)
|
147 |
|
148 |
create_button.click(
|
149 |
fn=create_speaker_from_audio,
|
150 |
inputs=[audio_upload, interface_state],
|
151 |
+
outputs=[speaker_json, speaker_state]
|
152 |
)
|
153 |
|
154 |
generate_button.click(
|
155 |
fn=generate_sample_audio,
|
156 |
inputs=[test_text, temperature_slider, interface_state, speaker_state],
|
157 |
+
outputs=[sample_audio]
|
158 |
)
|
159 |
|
160 |
demo.launch()
|
requirements.txt
CHANGED
@@ -1,3 +1,2 @@
|
|
1 |
-
llama-cpp-python --extra-index-url https://abetlen.github.io/llama-cpp-python/whl/cpu
|
2 |
outetts==0.4.4
|
3 |
gradio
|
|
|
|
|
1 |
outetts==0.4.4
|
2 |
gradio
|