Update app.py
Browse files
app.py
CHANGED
@@ -61,25 +61,6 @@ def text_to_speech(voice_id, text, session_id):
|
|
61 |
audio_file.write(response.content)
|
62 |
return audio_file_path
|
63 |
|
64 |
-
def save_uploaded_audio(audio_file, session_id):
|
65 |
-
if audio_file is None:
|
66 |
-
return None
|
67 |
-
|
68 |
-
# If audio_file is already a path, just copy it
|
69 |
-
if isinstance(audio_file, str):
|
70 |
-
ext = os.path.splitext(audio_file)[1]
|
71 |
-
if not ext:
|
72 |
-
ext = '.mp3'
|
73 |
-
output_path = f'temp_voice_{session_id}{ext}'
|
74 |
-
|
75 |
-
# Copy the file to our temporary location
|
76 |
-
with open(audio_file, 'rb') as source:
|
77 |
-
with open(output_path, 'wb') as dest:
|
78 |
-
dest.write(source.read())
|
79 |
-
return output_path
|
80 |
-
|
81 |
-
return None
|
82 |
-
|
83 |
def upload_file(file_path):
|
84 |
with open(file_path, 'rb') as file:
|
85 |
files = {'fileToUpload': (os.path.basename(file_path), file)}
|
@@ -154,20 +135,22 @@ def combine_audio_video(video_path, audio_path, output_path):
|
|
154 |
|
155 |
subprocess.run(cmd, check=True)
|
156 |
|
157 |
-
def process_video(
|
158 |
session_id = str(uuid.uuid4())
|
159 |
|
160 |
-
# Handle audio based on input type
|
161 |
if input_type == "text":
|
162 |
progress(0, desc="Generating speech...")
|
163 |
-
audio_path = text_to_speech(
|
164 |
if not audio_path:
|
165 |
return None, "Failed to generate speech audio."
|
166 |
else: # audio upload
|
167 |
progress(0, desc="Processing uploaded audio...")
|
168 |
-
|
169 |
-
|
170 |
-
|
|
|
|
|
171 |
|
172 |
progress(0.2, desc="Processing video...")
|
173 |
video_path = os.path.join("models", model)
|
@@ -222,37 +205,35 @@ def create_interface():
|
|
222 |
|
223 |
with gr.Blocks() as app:
|
224 |
gr.Markdown("# JSON Train")
|
225 |
-
|
226 |
-
|
227 |
-
|
228 |
-
|
229 |
-
|
230 |
-
|
231 |
-
|
232 |
-
with gr.Column():
|
233 |
-
# Text-to-speech inputs
|
234 |
-
with gr.Column(visible=True) as text_inputs:
|
235 |
-
voice_dropdown = gr.Dropdown(
|
236 |
-
choices=[v[0] for v in voices],
|
237 |
-
label="Select Voice",
|
238 |
-
value=voices[0][0] if voices else None
|
239 |
)
|
240 |
-
|
241 |
-
|
242 |
-
|
243 |
-
|
244 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
245 |
|
246 |
-
|
247 |
-
|
248 |
-
label="
|
249 |
-
value=models[0] if models else None
|
250 |
-
)
|
251 |
-
generate_btn = gr.Button("Generate Video")
|
252 |
-
|
253 |
-
with gr.Column():
|
254 |
-
video_output = gr.Video(label="Generated Video")
|
255 |
-
status_output = gr.Textbox(label="Status", interactive=False)
|
256 |
|
257 |
def toggle_inputs(input_type):
|
258 |
return (
|
@@ -267,9 +248,13 @@ def create_interface():
|
|
267 |
)
|
268 |
|
269 |
def on_generate(voice_name, model_name, text, audio_file, input_type):
|
270 |
-
|
271 |
-
|
272 |
-
|
|
|
|
|
|
|
|
|
273 |
return process_video(voice_id, model_name, text, audio_file, input_type)
|
274 |
|
275 |
generate_btn.click(
|
|
|
61 |
audio_file.write(response.content)
|
62 |
return audio_file_path
|
63 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
64 |
def upload_file(file_path):
|
65 |
with open(file_path, 'rb') as file:
|
66 |
files = {'fileToUpload': (os.path.basename(file_path), file)}
|
|
|
135 |
|
136 |
subprocess.run(cmd, check=True)
|
137 |
|
138 |
+
def process_video(voice_id, model, text, audio_file, input_type, progress=gr.Progress()):
|
139 |
session_id = str(uuid.uuid4())
|
140 |
|
141 |
+
# Handle audio source based on input type
|
142 |
if input_type == "text":
|
143 |
progress(0, desc="Generating speech...")
|
144 |
+
audio_path = text_to_speech(voice_id, text, session_id)
|
145 |
if not audio_path:
|
146 |
return None, "Failed to generate speech audio."
|
147 |
else: # audio upload
|
148 |
progress(0, desc="Processing uploaded audio...")
|
149 |
+
# Save uploaded audio to temporary file
|
150 |
+
temp_audio_path = f'temp_voice_{session_id}.mp3'
|
151 |
+
with open(temp_audio_path, 'wb') as f:
|
152 |
+
f.write(open(audio_file, 'rb').read())
|
153 |
+
audio_path = temp_audio_path
|
154 |
|
155 |
progress(0.2, desc="Processing video...")
|
156 |
video_path = os.path.join("models", model)
|
|
|
205 |
|
206 |
with gr.Blocks() as app:
|
207 |
gr.Markdown("# JSON Train")
|
208 |
+
with gr.Row():
|
209 |
+
with gr.Column():
|
210 |
+
input_type = gr.Radio(
|
211 |
+
choices=["text", "audio"],
|
212 |
+
label="Input Type",
|
213 |
+
value="text"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
214 |
)
|
215 |
+
|
216 |
+
with gr.Column(visible=True) as text_inputs:
|
217 |
+
voice_dropdown = gr.Dropdown(
|
218 |
+
choices=[v[0] for v in voices],
|
219 |
+
label="Select Voice",
|
220 |
+
value=voices[0][0] if voices else None
|
221 |
+
)
|
222 |
+
text_input = gr.Textbox(label="Enter text", lines=3)
|
223 |
+
|
224 |
+
with gr.Column(visible=False) as audio_inputs:
|
225 |
+
audio_upload = gr.Audio(label="Upload Audio", type="filepath")
|
226 |
+
|
227 |
+
model_dropdown = gr.Dropdown(
|
228 |
+
choices=models,
|
229 |
+
label="Select Video Model",
|
230 |
+
value=models[0] if models else None
|
231 |
+
)
|
232 |
+
generate_btn = gr.Button("Generate Video")
|
233 |
|
234 |
+
with gr.Column():
|
235 |
+
video_output = gr.Video(label="Generated Video")
|
236 |
+
status_output = gr.Textbox(label="Status", interactive=False)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
237 |
|
238 |
def toggle_inputs(input_type):
|
239 |
return (
|
|
|
248 |
)
|
249 |
|
250 |
def on_generate(voice_name, model_name, text, audio_file, input_type):
|
251 |
+
if input_type == "text":
|
252 |
+
voice_id = next((v[1] for v in voices if v[0] == voice_name), None)
|
253 |
+
if not voice_id:
|
254 |
+
return None, "Invalid voice selected."
|
255 |
+
else:
|
256 |
+
voice_id = None # Not needed for audio upload
|
257 |
+
|
258 |
return process_video(voice_id, model_name, text, audio_file, input_type)
|
259 |
|
260 |
generate_btn.click(
|