sheikhed commited on
Commit
355b39c
·
verified ·
1 Parent(s): eed4dc6

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +114 -79
app.py CHANGED
@@ -18,6 +18,11 @@ B_KEY = os.getenv("B_KEY")
18
  API_URL = os.getenv("API_URL")
19
  UPLOAD_URL = os.getenv("UPLOAD_URL")
20
 
 
 
 
 
 
21
  def get_voices():
22
  url = "https://api.elevenlabs.io/v1/voices"
23
  headers = {
@@ -55,40 +60,38 @@ def text_to_speech(voice_id, text, session_id):
55
  if response.status_code != 200:
56
  return None
57
 
58
- # Save temporary audio file with session ID
59
- audio_file_path = f'temp_voice_{session_id}.mp3'
60
  with open(audio_file_path, 'wb') as audio_file:
61
  audio_file.write(response.content)
62
  return audio_file_path
63
 
64
- def process_uploaded_audio(audio_file, session_id):
65
  """Process and validate uploaded audio file"""
66
- if audio_file is None:
67
  return None
68
 
69
  # Get the file extension
70
- ext = os.path.splitext(audio_file.name)[1].lower()
71
  if ext not in ['.mp3', '.wav', '.m4a', '.aac']:
72
  return None
73
 
74
- # Save the uploaded file with session ID
75
- audio_file_path = f'temp_voice_{session_id}{ext}'
76
- with open(audio_file_path, 'wb') as f:
77
- f.write(audio_file.read())
78
 
79
  # Convert to mp3 if not already mp3
80
  if ext != '.mp3':
81
- mp3_path = f'temp_voice_{session_id}.mp3'
82
  cmd = [
83
- 'ffmpeg', '-i', audio_file_path,
84
  '-codec:a', 'libmp3lame', '-qscale:a', '2',
85
- '-y', mp3_path
86
  ]
87
  subprocess.run(cmd, check=True)
88
- os.remove(audio_file_path)
89
- return mp3_path
90
-
91
- return audio_file_path
 
 
92
 
93
  def upload_file(file_path):
94
  with open(file_path, 'rb') as file:
@@ -167,64 +170,71 @@ def combine_audio_video(video_path, audio_path, output_path):
167
  def process_video(voice, model, text, audio_file, progress=gr.Progress()):
168
  session_id = str(uuid.uuid4())
169
 
170
- # Handle audio input (either text-to-speech or uploaded file)
171
- if audio_file is not None:
172
- progress(0.1, desc="Processing uploaded audio...")
173
- audio_path = process_uploaded_audio(audio_file, session_id)
174
- if not audio_path:
175
- return None, "Failed to process uploaded audio file."
176
- else:
177
- progress(0.1, desc="Generating speech...")
178
- audio_path = text_to_speech(voice, text, session_id)
179
- if not audio_path:
180
- return None, "Failed to generate speech audio."
181
-
182
- progress(0.2, desc="Processing video...")
183
- video_path = os.path.join("models", model)
184
-
185
  try:
186
- progress(0.3, desc="Uploading files...")
187
- video_url = upload_file(video_path)
188
- audio_url = upload_file(audio_path)
189
-
190
- if not video_url or not audio_url:
191
- raise Exception("Failed to upload files")
192
-
193
- progress(0.4, desc="Initiating lipsync...")
194
- job_data = lipsync_api_call(video_url, audio_url)
195
-
196
- if "error" in job_data or "message" in job_data:
197
- raise Exception(job_data.get("error", job_data.get("message", "Unknown error")))
198
-
199
- job_id = job_data["id"]
200
 
201
- progress(0.5, desc="Processing lipsync...")
202
- result_url = check_job_status(job_id)
203
 
204
- if result_url:
205
- progress(0.9, desc="Downloading result...")
206
- response = requests.get(result_url)
207
- output_path = f"output_{session_id}.mp4"
208
- with open(output_path, "wb") as f:
209
- f.write(response.content)
210
- progress(1.0, desc="Complete!")
211
- return output_path, "Lipsync completed successfully!"
212
- else:
213
- raise Exception("Lipsync processing failed or timed out")
214
-
215
- except Exception as e:
216
- progress(0.8, desc="Falling back to simple combination...")
217
  try:
218
- output_path = f"output_{session_id}.mp4"
219
- combine_audio_video(video_path, audio_path, output_path)
220
- progress(1.0, desc="Complete!")
221
- return output_path, f"Used fallback method. Original error: {str(e)}"
222
- except Exception as fallback_error:
223
- return None, f"All methods failed. Error: {str(fallback_error)}"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
224
  finally:
225
- # Cleanup
226
- if os.path.exists(audio_path):
227
- os.remove(audio_path)
 
 
 
 
228
 
229
  def create_interface():
230
  voices = get_voices()
@@ -232,9 +242,16 @@ def create_interface():
232
 
233
  with gr.Blocks() as app:
234
  gr.Markdown("# JSON Train")
 
235
  with gr.Row():
236
  with gr.Column():
237
- with gr.Tab("Text to Speech"):
 
 
 
 
 
 
238
  voice_dropdown = gr.Dropdown(
239
  choices=[v[0] for v in voices],
240
  label="Select Voice",
@@ -242,10 +259,11 @@ def create_interface():
242
  )
243
  text_input = gr.Textbox(label="Enter text", lines=3)
244
 
245
- with gr.Tab("Upload Audio"):
246
- audio_input = gr.File(
247
- label="Upload Audio File",
248
- file_types=["audio/*"]
 
249
  )
250
 
251
  model_dropdown = gr.Dropdown(
@@ -259,15 +277,32 @@ def create_interface():
259
  video_output = gr.Video(label="Generated Video")
260
  status_output = gr.Textbox(label="Status", interactive=False)
261
 
262
- def on_generate(voice_name, model_name, text, audio_file):
 
 
 
 
 
 
 
 
 
 
 
 
263
  voice_id = next((v[1] for v in voices if v[0] == voice_name), None)
264
- if not voice_id:
265
- return None, "Invalid voice selected."
266
- return process_video(voice_id, model_name, text, audio_file)
 
 
 
 
 
267
 
268
  generate_btn.click(
269
  fn=on_generate,
270
- inputs=[voice_dropdown, model_dropdown, text_input, audio_input],
271
  outputs=[video_output, status_output]
272
  )
273
 
 
18
  API_URL = os.getenv("API_URL")
19
  UPLOAD_URL = os.getenv("UPLOAD_URL")
20
 
21
+ # Create temp directory if it doesn't exist
22
+ TEMP_DIR = "temp"
23
+ if not os.path.exists(TEMP_DIR):
24
+ os.makedirs(TEMP_DIR)
25
+
26
  def get_voices():
27
  url = "https://api.elevenlabs.io/v1/voices"
28
  headers = {
 
60
  if response.status_code != 200:
61
  return None
62
 
63
+ audio_file_path = os.path.join(TEMP_DIR, f'temp_voice_{session_id}.mp3')
 
64
  with open(audio_file_path, 'wb') as audio_file:
65
  audio_file.write(response.content)
66
  return audio_file_path
67
 
68
+ def process_uploaded_audio(audio_path, session_id):
69
  """Process and validate uploaded audio file"""
70
+ if not audio_path:
71
  return None
72
 
73
  # Get the file extension
74
+ ext = os.path.splitext(audio_path)[1].lower()
75
  if ext not in ['.mp3', '.wav', '.m4a', '.aac']:
76
  return None
77
 
78
+ # Create output path
79
+ output_path = os.path.join(TEMP_DIR, f'temp_voice_{session_id}.mp3')
 
 
80
 
81
  # Convert to mp3 if not already mp3
82
  if ext != '.mp3':
 
83
  cmd = [
84
+ 'ffmpeg', '-i', audio_path,
85
  '-codec:a', 'libmp3lame', '-qscale:a', '2',
86
+ '-y', output_path
87
  ]
88
  subprocess.run(cmd, check=True)
89
+ return output_path
90
+ else:
91
+ # If it's already MP3, just copy it to temp directory
92
+ with open(audio_path, 'rb') as src, open(output_path, 'wb') as dst:
93
+ dst.write(src.read())
94
+ return output_path
95
 
96
  def upload_file(file_path):
97
  with open(file_path, 'rb') as file:
 
170
  def process_video(voice, model, text, audio_file, progress=gr.Progress()):
171
  session_id = str(uuid.uuid4())
172
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
173
  try:
174
+ # Handle audio input (either text-to-speech or uploaded file)
175
+ if audio_file is not None:
176
+ progress(0.1, desc="Processing uploaded audio...")
177
+ audio_path = process_uploaded_audio(audio_file.name, session_id)
178
+ if not audio_path:
179
+ return None, "Failed to process uploaded audio file."
180
+ elif text:
181
+ progress(0.1, desc="Generating speech...")
182
+ audio_path = text_to_speech(voice, text, session_id)
183
+ if not audio_path:
184
+ return None, "Failed to generate speech audio."
185
+ else:
186
+ return None, "Please either enter text or upload an audio file."
 
187
 
188
+ progress(0.2, desc="Processing video...")
189
+ video_path = os.path.join("models", model)
190
 
 
 
 
 
 
 
 
 
 
 
 
 
 
191
  try:
192
+ progress(0.3, desc="Uploading files...")
193
+ video_url = upload_file(video_path)
194
+ audio_url = upload_file(audio_path)
195
+
196
+ if not video_url or not audio_url:
197
+ raise Exception("Failed to upload files")
198
+
199
+ progress(0.4, desc="Initiating lipsync...")
200
+ job_data = lipsync_api_call(video_url, audio_url)
201
+
202
+ if "error" in job_data or "message" in job_data:
203
+ raise Exception(job_data.get("error", job_data.get("message", "Unknown error")))
204
+
205
+ job_id = job_data["id"]
206
+
207
+ progress(0.5, desc="Processing lipsync...")
208
+ result_url = check_job_status(job_id)
209
+
210
+ if result_url:
211
+ progress(0.9, desc="Downloading result...")
212
+ response = requests.get(result_url)
213
+ output_path = os.path.join(TEMP_DIR, f"output_{session_id}.mp4")
214
+ with open(output_path, "wb") as f:
215
+ f.write(response.content)
216
+ progress(1.0, desc="Complete!")
217
+ return output_path, "Lipsync completed successfully!"
218
+ else:
219
+ raise Exception("Lipsync processing failed or timed out")
220
+
221
+ except Exception as e:
222
+ progress(0.8, desc="Falling back to simple combination...")
223
+ try:
224
+ output_path = os.path.join(TEMP_DIR, f"output_{session_id}.mp4")
225
+ combine_audio_video(video_path, audio_path, output_path)
226
+ progress(1.0, desc="Complete!")
227
+ return output_path, f"Used fallback method. Original error: {str(e)}"
228
+ except Exception as fallback_error:
229
+ return None, f"All methods failed. Error: {str(fallback_error)}"
230
  finally:
231
+ # Cleanup temp files
232
+ for temp_file in os.listdir(TEMP_DIR):
233
+ if session_id in temp_file:
234
+ try:
235
+ os.remove(os.path.join(TEMP_DIR, temp_file))
236
+ except:
237
+ pass
238
 
239
  def create_interface():
240
  voices = get_voices()
 
242
 
243
  with gr.Blocks() as app:
244
  gr.Markdown("# JSON Train")
245
+
246
  with gr.Row():
247
  with gr.Column():
248
+ input_type = gr.Radio(
249
+ choices=["Text to Speech", "Upload Audio"],
250
+ label="Input Type",
251
+ value="Text to Speech"
252
+ )
253
+
254
+ with gr.Group() as tts_group:
255
  voice_dropdown = gr.Dropdown(
256
  choices=[v[0] for v in voices],
257
  label="Select Voice",
 
259
  )
260
  text_input = gr.Textbox(label="Enter text", lines=3)
261
 
262
+ with gr.Group() as audio_group:
263
+ audio_input = gr.Audio(
264
+ label="Upload Audio",
265
+ source="upload",
266
+ type="filepath"
267
  )
268
 
269
  model_dropdown = gr.Dropdown(
 
277
  video_output = gr.Video(label="Generated Video")
278
  status_output = gr.Textbox(label="Status", interactive=False)
279
 
280
+ def toggle_input_groups(choice):
281
+ if choice == "Text to Speech":
282
+ return gr.Group.update(visible=True), gr.Group.update(visible=False)
283
+ else:
284
+ return gr.Group.update(visible=False), gr.Group.update(visible=True)
285
+
286
+ input_type.change(
287
+ toggle_input_groups,
288
+ inputs=[input_type],
289
+ outputs=[tts_group, audio_group]
290
+ )
291
+
292
+ def on_generate(input_choice, voice_name, model_name, text, audio_file):
293
  voice_id = next((v[1] for v in voices if v[0] == voice_name), None)
294
+ if input_choice == "Text to Speech":
295
+ if not text:
296
+ return None, "Please enter some text."
297
+ return process_video(voice_id, model_name, text, None)
298
+ else:
299
+ if not audio_file:
300
+ return None, "Please upload an audio file."
301
+ return process_video(voice_id, model_name, None, audio_file)
302
 
303
  generate_btn.click(
304
  fn=on_generate,
305
+ inputs=[input_type, voice_dropdown, model_dropdown, text_input, audio_input],
306
  outputs=[video_output, status_output]
307
  )
308