sheikhed commited on
Commit
f959be9
·
verified ·
1 Parent(s): 14d37e3

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +28 -39
app.py CHANGED
@@ -55,6 +55,7 @@ def text_to_speech(voice_id, text, session_id):
55
  if response.status_code != 200:
56
  return None
57
 
 
58
  audio_file_path = f'temp_voice_{session_id}.mp3'
59
  with open(audio_file_path, 'wb') as audio_file:
60
  audio_file.write(response.content)
@@ -91,7 +92,7 @@ def lipsync_api_call(video_url, audio_url):
91
 
92
  def check_job_status(job_id):
93
  headers = {"x-api-key": B_KEY}
94
- max_attempts = 30
95
 
96
  for _ in range(max_attempts):
97
  response = requests.get(f"{API_URL}/{job_id}", headers=headers)
@@ -106,27 +107,31 @@ def check_job_status(job_id):
106
  return None
107
 
108
  def get_media_duration(file_path):
 
109
  cmd = ['ffprobe', '-v', 'error', '-show_entries', 'format=duration', '-of', 'default=noprint_wrappers=1:nokey=1', file_path]
110
  result = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
111
  return float(result.stdout.strip())
112
 
113
  def combine_audio_video(video_path, audio_path, output_path):
 
114
  video_duration = get_media_duration(video_path)
115
  audio_duration = get_media_duration(audio_path)
116
 
117
  if video_duration > audio_duration:
 
118
  cmd = [
119
  'ffmpeg', '-i', video_path, '-i', audio_path,
120
- '-t', str(audio_duration),
121
  '-map', '0:v', '-map', '1:a',
122
  '-c:v', 'copy', '-c:a', 'aac',
123
  '-y', output_path
124
  ]
125
  else:
126
- loop_count = int(audio_duration // video_duration) + 1
 
127
  cmd = [
128
  'ffmpeg', '-stream_loop', str(loop_count), '-i', video_path, '-i', audio_path,
129
- '-t', str(audio_duration),
130
  '-map', '0:v', '-map', '1:a',
131
  '-c:v', 'copy', '-c:a', 'aac',
132
  '-shortest', '-y', output_path
@@ -134,32 +139,20 @@ def combine_audio_video(video_path, audio_path, output_path):
134
 
135
  subprocess.run(cmd, check=True)
136
 
137
- def process_input(voice, model, text, audio_file, progress=gr.Progress()):
138
- session_id = str(uuid.uuid4())
139
- input_audio_path = None
140
-
141
- if text and audio_file:
142
- return None, "Please choose either text input or audio upload, not both."
143
-
144
- if text:
145
- progress(0, desc="Generating speech...")
146
- input_audio_path = text_to_speech(voice, text, session_id)
147
- if not input_audio_path:
148
- return None, "Failed to generate speech audio."
149
-
150
- elif audio_file:
151
- progress(0, desc="Using uploaded audio...")
152
- input_audio_path = audio_file # Use the file path directly
153
- else:
154
- return None, "Please provide either text or upload an audio file."
155
-
156
  progress(0.2, desc="Processing video...")
157
  video_path = os.path.join("models", model)
158
 
159
  try:
160
  progress(0.3, desc="Uploading files...")
161
  video_url = upload_file(video_path)
162
- audio_url = upload_file(input_audio_path)
163
 
164
  if not video_url or not audio_url:
165
  raise Exception("Failed to upload files")
@@ -190,14 +183,15 @@ def process_input(voice, model, text, audio_file, progress=gr.Progress()):
190
  progress(0.8, desc="Falling back to simple combination...")
191
  try:
192
  output_path = f"output_{session_id}.mp4"
193
- combine_audio_video(video_path, input_audio_path, output_path)
194
  progress(1.0, desc="Complete!")
195
  return output_path, f"Used fallback method. Original error: {str(e)}"
196
  except Exception as fallback_error:
197
  return None, f"All methods failed. Error: {str(fallback_error)}"
198
  finally:
199
- if text and os.path.exists(input_audio_path):
200
- os.remove(input_audio_path)
 
201
 
202
  def create_interface():
203
  voices = get_voices()
@@ -205,30 +199,25 @@ def create_interface():
205
 
206
  with gr.Blocks() as app:
207
  gr.Markdown("# JSON Train")
208
-
209
  with gr.Row():
210
  with gr.Column():
211
- voice_dropdown = gr.Dropdown(choices=[v[0] for v in voices], label="Select Voice", value=voices[0][0] if voices else None)
212
- model_dropdown = gr.Dropdown(choices=models, label="Select Model", value=models[0] if models else None)
213
-
214
- text_input = gr.Textbox(label="Enter Text", lines=3)
215
- audio_input = gr.Audio(label="Upload Audio", type="filepath")
216
-
217
  generate_btn = gr.Button("Generate Video")
218
-
219
  with gr.Column():
220
  video_output = gr.Video(label="Generated Video")
221
  status_output = gr.Textbox(label="Status", interactive=False)
222
 
223
- def on_generate(voice_name, model_name, text, audio_file):
224
  voice_id = next((v[1] for v in voices if v[0] == voice_name), None)
225
  if not voice_id:
226
  return None, "Invalid voice selected."
227
- return process_input(voice_id, model_name, text, audio_file)
228
 
229
  generate_btn.click(
230
  fn=on_generate,
231
- inputs=[voice_dropdown, model_dropdown, text_input, audio_input],
232
  outputs=[video_output, status_output]
233
  )
234
 
@@ -236,4 +225,4 @@ def create_interface():
236
 
237
  if __name__ == "__main__":
238
  app = create_interface()
239
- app.launch()
 
55
  if response.status_code != 200:
56
  return None
57
 
58
+ # Save temporary audio file with session ID
59
  audio_file_path = f'temp_voice_{session_id}.mp3'
60
  with open(audio_file_path, 'wb') as audio_file:
61
  audio_file.write(response.content)
 
92
 
93
  def check_job_status(job_id):
94
  headers = {"x-api-key": B_KEY}
95
+ max_attempts = 30 # Limit the number of attempts
96
 
97
  for _ in range(max_attempts):
98
  response = requests.get(f"{API_URL}/{job_id}", headers=headers)
 
107
  return None
108
 
109
  def get_media_duration(file_path):
110
+ # Fetch media duration using ffprobe
111
  cmd = ['ffprobe', '-v', 'error', '-show_entries', 'format=duration', '-of', 'default=noprint_wrappers=1:nokey=1', file_path]
112
  result = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
113
  return float(result.stdout.strip())
114
 
115
  def combine_audio_video(video_path, audio_path, output_path):
116
+ # Get durations of both video and audio
117
  video_duration = get_media_duration(video_path)
118
  audio_duration = get_media_duration(audio_path)
119
 
120
  if video_duration > audio_duration:
121
+ # Trim video to match the audio length
122
  cmd = [
123
  'ffmpeg', '-i', video_path, '-i', audio_path,
124
+ '-t', str(audio_duration), # Trim video to audio duration
125
  '-map', '0:v', '-map', '1:a',
126
  '-c:v', 'copy', '-c:a', 'aac',
127
  '-y', output_path
128
  ]
129
  else:
130
+ # Loop video if it's shorter than audio
131
+ loop_count = int(audio_duration // video_duration) + 1 # Calculate how many times to loop
132
  cmd = [
133
  'ffmpeg', '-stream_loop', str(loop_count), '-i', video_path, '-i', audio_path,
134
+ '-t', str(audio_duration), # Match the duration of the final video with the audio
135
  '-map', '0:v', '-map', '1:a',
136
  '-c:v', 'copy', '-c:a', 'aac',
137
  '-shortest', '-y', output_path
 
139
 
140
  subprocess.run(cmd, check=True)
141
 
142
+ def process_video(voice, model, text, progress=gr.Progress()):
143
+ session_id = str(uuid.uuid4()) # Generate a unique session ID
144
+ progress(0, desc="Generating speech...")
145
+ audio_path = text_to_speech(voice, text, session_id)
146
+ if not audio_path:
147
+ return None, "Failed to generate speech audio."
148
+
 
 
 
 
 
 
 
 
 
 
 
 
149
  progress(0.2, desc="Processing video...")
150
  video_path = os.path.join("models", model)
151
 
152
  try:
153
  progress(0.3, desc="Uploading files...")
154
  video_url = upload_file(video_path)
155
+ audio_url = upload_file(audio_path)
156
 
157
  if not video_url or not audio_url:
158
  raise Exception("Failed to upload files")
 
183
  progress(0.8, desc="Falling back to simple combination...")
184
  try:
185
  output_path = f"output_{session_id}.mp4"
186
+ combine_audio_video(video_path, audio_path, output_path)
187
  progress(1.0, desc="Complete!")
188
  return output_path, f"Used fallback method. Original error: {str(e)}"
189
  except Exception as fallback_error:
190
  return None, f"All methods failed. Error: {str(fallback_error)}"
191
  finally:
192
+ # Cleanup
193
+ if os.path.exists(audio_path):
194
+ os.remove(audio_path)
195
 
196
  def create_interface():
197
  voices = get_voices()
 
199
 
200
  with gr.Blocks() as app:
201
  gr.Markdown("# JSON Train")
 
202
  with gr.Row():
203
  with gr.Column():
204
+ voice_dropdown = gr.Dropdown(choices=[v[0] for v in voices], label="Select", value=voices[0][0] if voices else None)
205
+ model_dropdown = gr.Dropdown(choices=models, label="Select", value=models[0] if models else None)
206
+ text_input = gr.Textbox(label="Enter text", lines=3)
 
 
 
207
  generate_btn = gr.Button("Generate Video")
 
208
  with gr.Column():
209
  video_output = gr.Video(label="Generated Video")
210
  status_output = gr.Textbox(label="Status", interactive=False)
211
 
212
+ def on_generate(voice_name, model_name, text):
213
  voice_id = next((v[1] for v in voices if v[0] == voice_name), None)
214
  if not voice_id:
215
  return None, "Invalid voice selected."
216
+ return process_video(voice_id, model_name, text)
217
 
218
  generate_btn.click(
219
  fn=on_generate,
220
+ inputs=[voice_dropdown, model_dropdown, text_input],
221
  outputs=[video_output, status_output]
222
  )
223
 
 
225
 
226
  if __name__ == "__main__":
227
  app = create_interface()
228
+ app.launch()