hans00 commited on
Commit
ab1b361
Β·
unverified Β·
1 Parent(s): 9097e40

Initial implement

Browse files
Files changed (2) hide show
  1. app.py +181 -0
  2. requirements.txt +2 -0
app.py ADDED
@@ -0,0 +1,181 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import outetts
3
+ import json
4
+ import tempfile
5
+ import os
6
+ from typing import Optional, Dict, Any, List, Tuple
7
+
8
+ # Available OuteTTS models based on the documentation
9
+ MODELS = {v.value: k for k, v in outetts.Models.__members__.items()}
10
+
11
+ def initialize_interface(model_name: str) -> Tuple[str, str, Any]:
12
+ """Initialize the OuteTTS interface with selected model and backend."""
13
+ try:
14
+ model = MODELS[model_name]
15
+
16
+ # Configure the model
17
+ config = outetts.ModelConfig.auto_config(
18
+ model=model,
19
+ backend=outetts.Backend.LLAMACPP,
20
+ quantization=outetts.LlamaCppQuantization.FP16,
21
+ )
22
+
23
+ # Initialize the interface
24
+ interface = outetts.Interface(config=config)
25
+
26
+ status = f"βœ… Successfully initialized {model_name}"
27
+ info = f"Model: {model_name}\nStatus: Ready"
28
+
29
+ return status, info, interface
30
+
31
+ except Exception as e:
32
+ error_msg = f"❌ Error initializing model: {str(e)}"
33
+ return error_msg, f"Error: {str(e)}", None
34
+
35
+ def create_speaker_from_audio(audio_file, interface_state) -> Tuple[str, str, Any]:
36
+ """Create a speaker profile from uploaded audio file."""
37
+ if interface_state is None:
38
+ return "❌ Please initialize a model first", "", None
39
+
40
+ if audio_file is None:
41
+ return "❌ Please upload an audio file", "", None
42
+
43
+ try:
44
+ # Create speaker profile from audio
45
+ speaker = interface_state.create_speaker(audio_file)
46
+
47
+ # Convert speaker dict to formatted JSON
48
+ speaker_json = json.dumps(speaker, indent=2, ensure_ascii=False)
49
+
50
+ status = "βœ… Speaker profile created successfully!"
51
+
52
+ return status, speaker_json, speaker
53
+
54
+ except Exception as e:
55
+ error_msg = f"❌ Error creating speaker: {str(e)}"
56
+ return error_msg, f"Error: {str(e)}", None
57
+
58
+
59
+
60
+ def generate_sample_audio(text: str, temperature: float, interface_state, speaker_state) -> Tuple[str, Optional[str]]:
61
+ """Generate sample audio using the current speaker."""
62
+ if interface_state is None:
63
+ return "❌ Please initialize a model first", None
64
+
65
+ if speaker_state is None:
66
+ return "❌ Please create a speaker first", None
67
+
68
+ if not text.strip():
69
+ return "❌ Please enter text to generate", None
70
+
71
+ try:
72
+ # Generate audio
73
+ output = interface_state.generate(
74
+ config=outetts.GenerationConfig(
75
+ text=text,
76
+ speaker=speaker_state,
77
+ sampler_config=outetts.SamplerConfig(
78
+ temperature=temperature
79
+ )
80
+ )
81
+ )
82
+
83
+ # Save to temporary file
84
+ with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as f:
85
+ output.save(f.name)
86
+ return "βœ… Audio generated successfully!", f.name
87
+
88
+ except Exception as e:
89
+ error_msg = f"❌ Error generating audio: {str(e)}"
90
+ return error_msg, None
91
+
92
+ # Create the Gradio interface
93
+ with gr.Blocks(title="OuteTTS Speaker Creator", theme=gr.themes.Soft()) as demo:
94
+ gr.Markdown("# πŸŽ™οΈ OuteTTS Speaker Creator")
95
+ gr.Markdown("Create and manage speaker profiles for OuteTTS text-to-speech synthesis")
96
+
97
+ # State management for multi-user support
98
+ interface_state = gr.State(value=None)
99
+ speaker_state = gr.State(value=None)
100
+
101
+ with gr.Row():
102
+ with gr.Column(scale=1):
103
+ gr.Markdown("## πŸ”§ Model Configuration")
104
+
105
+ model_dropdown = gr.Dropdown(
106
+ choices=list(MODELS.keys()),
107
+ value=list(MODELS.keys())[0],
108
+ label="Select OuteTTS Model",
109
+ info="Choose the model variant to use"
110
+ )
111
+
112
+ init_button = gr.Button("Initialize Model", variant="primary")
113
+ init_status = gr.Textbox(label="Status", interactive=False)
114
+ init_info = gr.Textbox(label="Model Info", interactive=False, lines=3)
115
+
116
+ with gr.Column(scale=1):
117
+ gr.Markdown("## 🎡 Speaker Creation")
118
+
119
+ audio_upload = gr.Audio(
120
+ label="Upload Reference Audio",
121
+ type="filepath",
122
+ sources=["upload", "microphone"]
123
+ )
124
+ create_button = gr.Button("Create Speaker Profile", variant="primary")
125
+
126
+ speaker_status = gr.Textbox(label="Speaker Status", interactive=False)
127
+
128
+ with gr.Row():
129
+ with gr.Column():
130
+ gr.Markdown("## πŸ“‹ Speaker Profile (JSON)")
131
+ speaker_json = gr.Textbox(
132
+ label="Speaker JSON",
133
+ lines=15,
134
+ max_lines=20,
135
+ interactive=False,
136
+ show_copy_button=True
137
+ )
138
+
139
+ with gr.Row():
140
+ with gr.Column():
141
+ gr.Markdown("## 🎧 Test Audio Generation")
142
+ test_text = gr.Textbox(
143
+ label="Test Text",
144
+ placeholder="Enter text to generate speech...",
145
+ lines=3,
146
+ value="Hello, this is a test of the OuteTTS speaker profile."
147
+ )
148
+
149
+ temperature_slider = gr.Slider(
150
+ minimum=0.1,
151
+ maximum=1.0,
152
+ step=0.1,
153
+ value=0.4,
154
+ label="Temperature",
155
+ info="Controls randomness in generation"
156
+ )
157
+
158
+ generate_button = gr.Button("Generate Sample Audio", variant="primary")
159
+ generate_status = gr.Textbox(label="Generation Status", interactive=False)
160
+ sample_audio = gr.Audio(label="Generated Audio", interactive=False)
161
+
162
+ # Event handlers
163
+ init_button.click(
164
+ fn=initialize_interface,
165
+ inputs=[model_dropdown],
166
+ outputs=[init_status, init_info, interface_state]
167
+ )
168
+
169
+ create_button.click(
170
+ fn=create_speaker_from_audio,
171
+ inputs=[audio_upload, interface_state],
172
+ outputs=[speaker_status, speaker_json, speaker_state]
173
+ )
174
+
175
+ generate_button.click(
176
+ fn=generate_sample_audio,
177
+ inputs=[test_text, temperature_slider, interface_state, speaker_state],
178
+ outputs=[generate_status, sample_audio]
179
+ )
180
+
181
+ demo.launch()
requirements.txt ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ outetts==0.4.4
2
+ gradio