sheikhed commited on
Commit
d52bea3
·
verified ·
1 Parent(s): 84501e2

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +94 -120
app.py CHANGED
@@ -18,11 +18,6 @@ B_KEY = os.getenv("B_KEY")
18
  API_URL = os.getenv("API_URL")
19
  UPLOAD_URL = os.getenv("UPLOAD_URL")
20
 
21
- # Create temp directory if it doesn't exist
22
- TEMP_DIR = "temp"
23
- if not os.path.exists(TEMP_DIR):
24
- os.makedirs(TEMP_DIR)
25
-
26
  def get_voices():
27
  url = "https://api.elevenlabs.io/v1/voices"
28
  headers = {
@@ -60,38 +55,32 @@ def text_to_speech(voice_id, text, session_id):
60
  if response.status_code != 200:
61
  return None
62
 
63
- audio_file_path = os.path.join(TEMP_DIR, f'temp_voice_{session_id}.mp3')
 
64
  with open(audio_file_path, 'wb') as audio_file:
65
  audio_file.write(response.content)
66
  return audio_file_path
67
 
68
- def process_uploaded_audio(audio_path, session_id):
69
- """Process and validate uploaded audio file"""
70
- if not audio_path:
71
  return None
72
 
73
- # Get the file extension
74
- ext = os.path.splitext(audio_path)[1].lower()
75
- if ext not in ['.mp3', '.wav', '.m4a', '.aac']:
76
- return None
77
 
78
- # Create output path
79
- output_path = os.path.join(TEMP_DIR, f'temp_voice_{session_id}.mp3')
 
 
 
 
 
 
 
80
 
81
- # Convert to mp3 if not already mp3
82
- if ext != '.mp3':
83
- cmd = [
84
- 'ffmpeg', '-i', audio_path,
85
- '-codec:a', 'libmp3lame', '-qscale:a', '2',
86
- '-y', output_path
87
- ]
88
- subprocess.run(cmd, check=True)
89
- return output_path
90
- else:
91
- # If it's already MP3, just copy it to temp directory
92
- with open(audio_path, 'rb') as src, open(output_path, 'wb') as dst:
93
- dst.write(src.read())
94
- return output_path
95
 
96
  def upload_file(file_path):
97
  with open(file_path, 'rb') as file:
@@ -124,7 +113,7 @@ def lipsync_api_call(video_url, audio_url):
124
 
125
  def check_job_status(job_id):
126
  headers = {"x-api-key": B_KEY}
127
- max_attempts = 30
128
 
129
  for _ in range(max_attempts):
130
  response = requests.get(f"{API_URL}/{job_id}", headers=headers)
@@ -167,74 +156,67 @@ def combine_audio_video(video_path, audio_path, output_path):
167
 
168
  subprocess.run(cmd, check=True)
169
 
170
- def process_video(voice, model, text, audio_file, progress=gr.Progress()):
171
  session_id = str(uuid.uuid4())
172
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
173
  try:
174
- # Handle audio input (either text-to-speech or uploaded file)
175
- if audio_file is not None:
176
- progress(0.1, desc="Processing uploaded audio...")
177
- audio_path = process_uploaded_audio(audio_file, session_id)
178
- if not audio_path:
179
- return None, "Failed to process uploaded audio file."
180
- elif text:
181
- progress(0.1, desc="Generating speech...")
182
- audio_path = text_to_speech(voice, text, session_id)
183
- if not audio_path:
184
- return None, "Failed to generate speech audio."
185
- else:
186
- return None, "Please either enter text or upload an audio file."
187
 
188
- progress(0.2, desc="Processing video...")
189
- video_path = os.path.join("models", model)
190
 
191
- try:
192
- progress(0.3, desc="Uploading files...")
193
- video_url = upload_file(video_path)
194
- audio_url = upload_file(audio_path)
195
-
196
- if not video_url or not audio_url:
197
- raise Exception("Failed to upload files")
198
-
199
- progress(0.4, desc="Initiating lipsync...")
200
- job_data = lipsync_api_call(video_url, audio_url)
201
-
202
- if "error" in job_data or "message" in job_data:
203
- raise Exception(job_data.get("error", job_data.get("message", "Unknown error")))
204
-
205
- job_id = job_data["id"]
206
-
207
- progress(0.5, desc="Processing lipsync...")
208
- result_url = check_job_status(job_id)
 
 
 
209
 
210
- if result_url:
211
- progress(0.9, desc="Downloading result...")
212
- response = requests.get(result_url)
213
- output_path = os.path.join(TEMP_DIR, f"output_{session_id}.mp4")
214
- with open(output_path, "wb") as f:
215
- f.write(response.content)
216
- progress(1.0, desc="Complete!")
217
- return output_path, "Lipsync completed successfully!"
218
- else:
219
- raise Exception("Lipsync processing failed or timed out")
220
-
221
- except Exception as e:
222
- progress(0.8, desc="Falling back to simple combination...")
223
- try:
224
- output_path = os.path.join(TEMP_DIR, f"output_{session_id}.mp4")
225
- combine_audio_video(video_path, audio_path, output_path)
226
- progress(1.0, desc="Complete!")
227
- return output_path, f"Used fallback method. Original error: {str(e)}"
228
- except Exception as fallback_error:
229
- return None, f"All methods failed. Error: {str(fallback_error)}"
230
  finally:
231
- # Cleanup temp files
232
- for temp_file in os.listdir(TEMP_DIR):
233
- if session_id in temp_file:
234
- try:
235
- os.remove(os.path.join(TEMP_DIR, temp_file))
236
- except:
237
- pass
238
 
239
  def create_interface():
240
  voices = get_voices()
@@ -242,29 +224,26 @@ def create_interface():
242
 
243
  with gr.Blocks() as app:
244
  gr.Markdown("# JSON Train")
245
-
246
  with gr.Row():
247
  with gr.Column():
248
  input_type = gr.Radio(
249
- choices=["Text to Speech", "Upload Audio"],
250
  label="Input Type",
251
- value="Text to Speech"
252
  )
253
 
254
- with gr.Group() as tts_group:
 
255
  voice_dropdown = gr.Dropdown(
256
- choices=[v[0] for v in voices],
257
- label="Select Voice",
258
  value=voices[0][0] if voices else None
259
  )
260
  text_input = gr.Textbox(label="Enter text", lines=3)
261
 
262
- with gr.Group() as audio_group:
263
- audio_input = gr.Audio(
264
- label="Upload Audio",
265
- type="filepath",
266
- format="mp3"
267
- )
268
 
269
  model_dropdown = gr.Dropdown(
270
  choices=models,
@@ -277,32 +256,27 @@ def create_interface():
277
  video_output = gr.Video(label="Generated Video")
278
  status_output = gr.Textbox(label="Status", interactive=False)
279
 
280
- def toggle_input_groups(choice):
281
- if choice == "Text to Speech":
282
- return gr.Group.update(visible=True), gr.Group.update(visible=False)
283
- else:
284
- return gr.Group.update(visible=False), gr.Group.update(visible=True)
285
 
286
  input_type.change(
287
- toggle_input_groups,
288
  inputs=[input_type],
289
- outputs=[tts_group, audio_group]
290
  )
291
 
292
- def on_generate(input_choice, voice_name, model_name, text, audio_file):
293
  voice_id = next((v[1] for v in voices if v[0] == voice_name), None)
294
- if input_choice == "Text to Speech":
295
- if not text:
296
- return None, "Please enter some text."
297
- return process_video(voice_id, model_name, text, None)
298
- else:
299
- if not audio_file:
300
- return None, "Please upload an audio file."
301
- return process_video(voice_id, model_name, None, audio_file)
302
 
303
  generate_btn.click(
304
  fn=on_generate,
305
- inputs=[input_type, voice_dropdown, model_dropdown, text_input, audio_input],
306
  outputs=[video_output, status_output]
307
  )
308
 
 
18
  API_URL = os.getenv("API_URL")
19
  UPLOAD_URL = os.getenv("UPLOAD_URL")
20
 
 
 
 
 
 
21
  def get_voices():
22
  url = "https://api.elevenlabs.io/v1/voices"
23
  headers = {
 
55
  if response.status_code != 200:
56
  return None
57
 
58
+ # Save temporary audio file with session ID
59
+ audio_file_path = f'temp_voice_{session_id}.mp3'
60
  with open(audio_file_path, 'wb') as audio_file:
61
  audio_file.write(response.content)
62
  return audio_file_path
63
 
64
+ def save_uploaded_audio(audio_file, session_id):
65
+ if audio_file is None:
 
66
  return None
67
 
68
+ # Get the file extension from the original filename
69
+ _, ext = os.path.splitext(audio_file.name)
70
+ if not ext:
71
+ ext = '.mp3' # Default extension if none is found
72
 
73
+ # Save the uploaded audio file with session ID
74
+ audio_file_path = f'temp_voice_{session_id}{ext}'
75
+ with open(audio_file_path, 'wb') as f:
76
+ if isinstance(audio_file, str): # If it's a file path
77
+ with open(audio_file, 'rb') as source:
78
+ f.write(source.read())
79
+ else: # If it's a file object
80
+ audio_file.seek(0)
81
+ f.write(audio_file.read())
82
 
83
+ return audio_file_path
 
 
 
 
 
 
 
 
 
 
 
 
 
84
 
85
  def upload_file(file_path):
86
  with open(file_path, 'rb') as file:
 
113
 
114
  def check_job_status(job_id):
115
  headers = {"x-api-key": B_KEY}
116
+ max_attempts = 30 # Limit the number of attempts
117
 
118
  for _ in range(max_attempts):
119
  response = requests.get(f"{API_URL}/{job_id}", headers=headers)
 
156
 
157
  subprocess.run(cmd, check=True)
158
 
159
+ def process_video(voice, model, text, audio_file, input_type, progress=gr.Progress()):
160
  session_id = str(uuid.uuid4())
161
 
162
+ # Handle audio based on input type
163
+ if input_type == "text":
164
+ progress(0, desc="Generating speech...")
165
+ audio_path = text_to_speech(voice, text, session_id)
166
+ if not audio_path:
167
+ return None, "Failed to generate speech audio."
168
+ else: # audio upload
169
+ progress(0, desc="Processing uploaded audio...")
170
+ audio_path = save_uploaded_audio(audio_file, session_id)
171
+ if not audio_path:
172
+ return None, "Failed to process uploaded audio."
173
+
174
+ progress(0.2, desc="Processing video...")
175
+ video_path = os.path.join("models", model)
176
+
177
  try:
178
+ progress(0.3, desc="Uploading files...")
179
+ video_url = upload_file(video_path)
180
+ audio_url = upload_file(audio_path)
 
 
 
 
 
 
 
 
 
 
181
 
182
+ if not video_url or not audio_url:
183
+ raise Exception("Failed to upload files")
184
 
185
+ progress(0.4, desc="Initiating lipsync...")
186
+ job_data = lipsync_api_call(video_url, audio_url)
187
+
188
+ if "error" in job_data or "message" in job_data:
189
+ raise Exception(job_data.get("error", job_data.get("message", "Unknown error")))
190
+
191
+ job_id = job_data["id"]
192
+
193
+ progress(0.5, desc="Processing lipsync...")
194
+ result_url = check_job_status(job_id)
195
+
196
+ if result_url:
197
+ progress(0.9, desc="Downloading result...")
198
+ response = requests.get(result_url)
199
+ output_path = f"output_{session_id}.mp4"
200
+ with open(output_path, "wb") as f:
201
+ f.write(response.content)
202
+ progress(1.0, desc="Complete!")
203
+ return output_path, "Lipsync completed successfully!"
204
+ else:
205
+ raise Exception("Lipsync processing failed or timed out")
206
 
207
+ except Exception as e:
208
+ progress(0.8, desc="Falling back to simple combination...")
209
+ try:
210
+ output_path = f"output_{session_id}.mp4"
211
+ combine_audio_video(video_path, audio_path, output_path)
212
+ progress(1.0, desc="Complete!")
213
+ return output_path, f"Used fallback method. Original error: {str(e)}"
214
+ except Exception as fallback_error:
215
+ return None, f"All methods failed. Error: {str(fallback_error)}"
 
 
 
 
 
 
 
 
 
 
 
216
  finally:
217
+ # Cleanup
218
+ if os.path.exists(audio_path):
219
+ os.remove(audio_path)
 
 
 
 
220
 
221
  def create_interface():
222
  voices = get_voices()
 
224
 
225
  with gr.Blocks() as app:
226
  gr.Markdown("# JSON Train")
 
227
  with gr.Row():
228
  with gr.Column():
229
  input_type = gr.Radio(
230
+ choices=["text", "audio"],
231
  label="Input Type",
232
+ value="text"
233
  )
234
 
235
+ # Text-to-speech inputs
236
+ with gr.Group() as text_inputs:
237
  voice_dropdown = gr.Dropdown(
238
+ choices=[v[0] for v in voices],
239
+ label="Select Voice",
240
  value=voices[0][0] if voices else None
241
  )
242
  text_input = gr.Textbox(label="Enter text", lines=3)
243
 
244
+ # Audio upload input
245
+ with gr.Group() as audio_inputs:
246
+ audio_upload = gr.Audio(label="Upload Audio", type="filepath")
 
 
 
247
 
248
  model_dropdown = gr.Dropdown(
249
  choices=models,
 
256
  video_output = gr.Video(label="Generated Video")
257
  status_output = gr.Textbox(label="Status", interactive=False)
258
 
259
+ def toggle_inputs(input_type):
260
+ return (
261
+ gr.Group.update(visible=(input_type == "text")),
262
+ gr.Group.update(visible=(input_type == "audio"))
263
+ )
264
 
265
  input_type.change(
266
+ fn=toggle_inputs,
267
  inputs=[input_type],
268
+ outputs=[text_inputs, audio_inputs]
269
  )
270
 
271
+ def on_generate(voice_name, model_name, text, audio_file, input_type):
272
  voice_id = next((v[1] for v in voices if v[0] == voice_name), None)
273
+ if input_type == "text" and not voice_id:
274
+ return None, "Invalid voice selected."
275
+ return process_video(voice_id, model_name, text, audio_file, input_type)
 
 
 
 
 
276
 
277
  generate_btn.click(
278
  fn=on_generate,
279
+ inputs=[voice_dropdown, model_dropdown, text_input, audio_upload, input_type],
280
  outputs=[video_output, status_output]
281
  )
282