sheikhed commited on
Commit
eed4dc6
·
verified ·
1 Parent(s): a7c9b9d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +71 -21
app.py CHANGED
@@ -61,6 +61,35 @@ def text_to_speech(voice_id, text, session_id):
61
  audio_file.write(response.content)
62
  return audio_file_path
63
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
64
  def upload_file(file_path):
65
  with open(file_path, 'rb') as file:
66
  files = {'fileToUpload': (os.path.basename(file_path), file)}
@@ -92,7 +121,7 @@ def lipsync_api_call(video_url, audio_url):
92
 
93
  def check_job_status(job_id):
94
  headers = {"x-api-key": B_KEY}
95
- max_attempts = 30 # Limit the number of attempts
96
 
97
  for _ in range(max_attempts):
98
  response = requests.get(f"{API_URL}/{job_id}", headers=headers)
@@ -107,31 +136,27 @@ def check_job_status(job_id):
107
  return None
108
 
109
  def get_media_duration(file_path):
110
- # Fetch media duration using ffprobe
111
  cmd = ['ffprobe', '-v', 'error', '-show_entries', 'format=duration', '-of', 'default=noprint_wrappers=1:nokey=1', file_path]
112
  result = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
113
  return float(result.stdout.strip())
114
 
115
  def combine_audio_video(video_path, audio_path, output_path):
116
- # Get durations of both video and audio
117
  video_duration = get_media_duration(video_path)
118
  audio_duration = get_media_duration(audio_path)
119
 
120
  if video_duration > audio_duration:
121
- # Trim video to match the audio length
122
  cmd = [
123
  'ffmpeg', '-i', video_path, '-i', audio_path,
124
- '-t', str(audio_duration), # Trim video to audio duration
125
  '-map', '0:v', '-map', '1:a',
126
  '-c:v', 'copy', '-c:a', 'aac',
127
  '-y', output_path
128
  ]
129
  else:
130
- # Loop video if it's shorter than audio
131
- loop_count = int(audio_duration // video_duration) + 1 # Calculate how many times to loop
132
  cmd = [
133
  'ffmpeg', '-stream_loop', str(loop_count), '-i', video_path, '-i', audio_path,
134
- '-t', str(audio_duration), # Match the duration of the final video with the audio
135
  '-map', '0:v', '-map', '1:a',
136
  '-c:v', 'copy', '-c:a', 'aac',
137
  '-shortest', '-y', output_path
@@ -139,12 +164,20 @@ def combine_audio_video(video_path, audio_path, output_path):
139
 
140
  subprocess.run(cmd, check=True)
141
 
142
- def process_video(voice, model, text, progress=gr.Progress()):
143
- session_id = str(uuid.uuid4()) # Generate a unique session ID
144
- progress(0, desc="Generating speech...")
145
- audio_path = text_to_speech(voice, text, session_id)
146
- if not audio_path:
147
- return None, "Failed to generate speech audio."
 
 
 
 
 
 
 
 
148
 
149
  progress(0.2, desc="Processing video...")
150
  video_path = os.path.join("models", model)
@@ -201,23 +234,40 @@ def create_interface():
201
  gr.Markdown("# JSON Train")
202
  with gr.Row():
203
  with gr.Column():
204
- voice_dropdown = gr.Dropdown(choices=[v[0] for v in voices], label="Select", value=voices[0][0] if voices else None)
205
- model_dropdown = gr.Dropdown(choices=models, label="Select", value=models[0] if models else None)
206
- text_input = gr.Textbox(label="Enter text", lines=3)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
207
  generate_btn = gr.Button("Generate Video")
 
208
  with gr.Column():
209
  video_output = gr.Video(label="Generated Video")
210
  status_output = gr.Textbox(label="Status", interactive=False)
211
 
212
- def on_generate(voice_name, model_name, text):
213
  voice_id = next((v[1] for v in voices if v[0] == voice_name), None)
214
  if not voice_id:
215
  return None, "Invalid voice selected."
216
- return process_video(voice_id, model_name, text)
217
 
218
  generate_btn.click(
219
  fn=on_generate,
220
- inputs=[voice_dropdown, model_dropdown, text_input],
221
  outputs=[video_output, status_output]
222
  )
223
 
@@ -225,4 +275,4 @@ def create_interface():
225
 
226
  if __name__ == "__main__":
227
  app = create_interface()
228
- app.launch()
 
61
  audio_file.write(response.content)
62
  return audio_file_path
63
 
64
+ def process_uploaded_audio(audio_file, session_id):
65
+ """Process and validate uploaded audio file"""
66
+ if audio_file is None:
67
+ return None
68
+
69
+ # Get the file extension
70
+ ext = os.path.splitext(audio_file.name)[1].lower()
71
+ if ext not in ['.mp3', '.wav', '.m4a', '.aac']:
72
+ return None
73
+
74
+ # Save the uploaded file with session ID
75
+ audio_file_path = f'temp_voice_{session_id}{ext}'
76
+ with open(audio_file_path, 'wb') as f:
77
+ f.write(audio_file.read())
78
+
79
+ # Convert to mp3 if not already mp3
80
+ if ext != '.mp3':
81
+ mp3_path = f'temp_voice_{session_id}.mp3'
82
+ cmd = [
83
+ 'ffmpeg', '-i', audio_file_path,
84
+ '-codec:a', 'libmp3lame', '-qscale:a', '2',
85
+ '-y', mp3_path
86
+ ]
87
+ subprocess.run(cmd, check=True)
88
+ os.remove(audio_file_path)
89
+ return mp3_path
90
+
91
+ return audio_file_path
92
+
93
  def upload_file(file_path):
94
  with open(file_path, 'rb') as file:
95
  files = {'fileToUpload': (os.path.basename(file_path), file)}
 
121
 
122
  def check_job_status(job_id):
123
  headers = {"x-api-key": B_KEY}
124
+ max_attempts = 30
125
 
126
  for _ in range(max_attempts):
127
  response = requests.get(f"{API_URL}/{job_id}", headers=headers)
 
136
  return None
137
 
138
  def get_media_duration(file_path):
 
139
  cmd = ['ffprobe', '-v', 'error', '-show_entries', 'format=duration', '-of', 'default=noprint_wrappers=1:nokey=1', file_path]
140
  result = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
141
  return float(result.stdout.strip())
142
 
143
  def combine_audio_video(video_path, audio_path, output_path):
 
144
  video_duration = get_media_duration(video_path)
145
  audio_duration = get_media_duration(audio_path)
146
 
147
  if video_duration > audio_duration:
 
148
  cmd = [
149
  'ffmpeg', '-i', video_path, '-i', audio_path,
150
+ '-t', str(audio_duration),
151
  '-map', '0:v', '-map', '1:a',
152
  '-c:v', 'copy', '-c:a', 'aac',
153
  '-y', output_path
154
  ]
155
  else:
156
+ loop_count = int(audio_duration // video_duration) + 1
 
157
  cmd = [
158
  'ffmpeg', '-stream_loop', str(loop_count), '-i', video_path, '-i', audio_path,
159
+ '-t', str(audio_duration),
160
  '-map', '0:v', '-map', '1:a',
161
  '-c:v', 'copy', '-c:a', 'aac',
162
  '-shortest', '-y', output_path
 
164
 
165
  subprocess.run(cmd, check=True)
166
 
167
+ def process_video(voice, model, text, audio_file, progress=gr.Progress()):
168
+ session_id = str(uuid.uuid4())
169
+
170
+ # Handle audio input (either text-to-speech or uploaded file)
171
+ if audio_file is not None:
172
+ progress(0.1, desc="Processing uploaded audio...")
173
+ audio_path = process_uploaded_audio(audio_file, session_id)
174
+ if not audio_path:
175
+ return None, "Failed to process uploaded audio file."
176
+ else:
177
+ progress(0.1, desc="Generating speech...")
178
+ audio_path = text_to_speech(voice, text, session_id)
179
+ if not audio_path:
180
+ return None, "Failed to generate speech audio."
181
 
182
  progress(0.2, desc="Processing video...")
183
  video_path = os.path.join("models", model)
 
234
  gr.Markdown("# JSON Train")
235
  with gr.Row():
236
  with gr.Column():
237
+ with gr.Tab("Text to Speech"):
238
+ voice_dropdown = gr.Dropdown(
239
+ choices=[v[0] for v in voices],
240
+ label="Select Voice",
241
+ value=voices[0][0] if voices else None
242
+ )
243
+ text_input = gr.Textbox(label="Enter text", lines=3)
244
+
245
+ with gr.Tab("Upload Audio"):
246
+ audio_input = gr.File(
247
+ label="Upload Audio File",
248
+ file_types=["audio/*"]
249
+ )
250
+
251
+ model_dropdown = gr.Dropdown(
252
+ choices=models,
253
+ label="Select Video Model",
254
+ value=models[0] if models else None
255
+ )
256
  generate_btn = gr.Button("Generate Video")
257
+
258
  with gr.Column():
259
  video_output = gr.Video(label="Generated Video")
260
  status_output = gr.Textbox(label="Status", interactive=False)
261
 
262
+ def on_generate(voice_name, model_name, text, audio_file):
263
  voice_id = next((v[1] for v in voices if v[0] == voice_name), None)
264
  if not voice_id:
265
  return None, "Invalid voice selected."
266
+ return process_video(voice_id, model_name, text, audio_file)
267
 
268
  generate_btn.click(
269
  fn=on_generate,
270
+ inputs=[voice_dropdown, model_dropdown, text_input, audio_input],
271
  outputs=[video_output, status_output]
272
  )
273
 
 
275
 
276
  if __name__ == "__main__":
277
  app = create_interface()
278
+ app.launch()