Spaces:

sheikhed
/

json

Sleeping

App Files Files Community

sheikhed commited on Oct 11, 2024

Commit

f959be9

verified ·

1 Parent(s): 14d37e3

Update app.py

Browse files

Files changed (1) hide show

app.py +28 -39

app.py CHANGED Viewed

@@ -55,6 +55,7 @@ def text_to_speech(voice_id, text, session_id):
     if response.status_code != 200:
         return None
     audio_file_path = f'temp_voice_{session_id}.mp3'
     with open(audio_file_path, 'wb') as audio_file:
         audio_file.write(response.content)
@@ -91,7 +92,7 @@ def lipsync_api_call(video_url, audio_url):
 def check_job_status(job_id):
     headers = {"x-api-key": B_KEY}
-    max_attempts = 30
     for _ in range(max_attempts):
         response = requests.get(f"{API_URL}/{job_id}", headers=headers)
@@ -106,27 +107,31 @@ def check_job_status(job_id):
     return None
 def get_media_duration(file_path):
     cmd = ['ffprobe', '-v', 'error', '-show_entries', 'format=duration', '-of', 'default=noprint_wrappers=1:nokey=1', file_path]
     result = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
     return float(result.stdout.strip())
 def combine_audio_video(video_path, audio_path, output_path):
     video_duration = get_media_duration(video_path)
     audio_duration = get_media_duration(audio_path)
     if video_duration > audio_duration:
         cmd = [
             'ffmpeg', '-i', video_path, '-i', audio_path,
-            '-t', str(audio_duration),
             '-map', '0:v', '-map', '1:a',
             '-c:v', 'copy', '-c:a', 'aac',
             '-y', output_path
         ]
     else:
-        loop_count = int(audio_duration // video_duration) + 1
         cmd = [
             'ffmpeg', '-stream_loop', str(loop_count), '-i', video_path, '-i', audio_path,
-            '-t', str(audio_duration),
             '-map', '0:v', '-map', '1:a',
             '-c:v', 'copy', '-c:a', 'aac',
             '-shortest', '-y', output_path
@@ -134,32 +139,20 @@ def combine_audio_video(video_path, audio_path, output_path):
     subprocess.run(cmd, check=True)
-def process_input(voice, model, text, audio_file, progress=gr.Progress()):
-    session_id = str(uuid.uuid4())
-    input_audio_path = None
-    if text and audio_file:
-        return None, "Please choose either text input or audio upload, not both."
-    if text:
-        progress(0, desc="Generating speech...")
-        input_audio_path = text_to_speech(voice, text, session_id)
-        if not input_audio_path:
-            return None, "Failed to generate speech audio."
-    elif audio_file:
-        progress(0, desc="Using uploaded audio...")
-        input_audio_path = audio_file  # Use the file path directly
-    else:
-        return None, "Please provide either text or upload an audio file."
     progress(0.2, desc="Processing video...")
     video_path = os.path.join("models", model)
     try:
         progress(0.3, desc="Uploading files...")
         video_url = upload_file(video_path)
-        audio_url = upload_file(input_audio_path)
         if not video_url or not audio_url:
             raise Exception("Failed to upload files")
@@ -190,14 +183,15 @@ def process_input(voice, model, text, audio_file, progress=gr.Progress()):
         progress(0.8, desc="Falling back to simple combination...")
         try:
             output_path = f"output_{session_id}.mp4"
-            combine_audio_video(video_path, input_audio_path, output_path)
             progress(1.0, desc="Complete!")
             return output_path, f"Used fallback method. Original error: {str(e)}"
         except Exception as fallback_error:
             return None, f"All methods failed. Error: {str(fallback_error)}"
     finally:
-        if text and os.path.exists(input_audio_path):
-            os.remove(input_audio_path)
 def create_interface():
     voices = get_voices()
@@ -205,30 +199,25 @@ def create_interface():
     with gr.Blocks() as app:
         gr.Markdown("# JSON Train")
         with gr.Row():
             with gr.Column():
-                voice_dropdown = gr.Dropdown(choices=[v[0] for v in voices], label="Select Voice", value=voices[0][0] if voices else None)
-                model_dropdown = gr.Dropdown(choices=models, label="Select Model", value=models[0] if models else None)
-                text_input = gr.Textbox(label="Enter Text", lines=3)
-                audio_input = gr.Audio(label="Upload Audio", type="filepath")
                 generate_btn = gr.Button("Generate Video")
             with gr.Column():
                 video_output = gr.Video(label="Generated Video")
                 status_output = gr.Textbox(label="Status", interactive=False)
-        def on_generate(voice_name, model_name, text, audio_file):
             voice_id = next((v[1] for v in voices if v[0] == voice_name), None)
             if not voice_id:
                 return None, "Invalid voice selected."
-            return process_input(voice_id, model_name, text, audio_file)
         generate_btn.click(
             fn=on_generate,
-            inputs=[voice_dropdown, model_dropdown, text_input, audio_input],
             outputs=[video_output, status_output]
         )
@@ -236,4 +225,4 @@ def create_interface():
 if __name__ == "__main__":
     app = create_interface()
-    app.launch()

     if response.status_code != 200:
         return None
+    # Save temporary audio file with session ID
     audio_file_path = f'temp_voice_{session_id}.mp3'
     with open(audio_file_path, 'wb') as audio_file:
         audio_file.write(response.content)
 def check_job_status(job_id):
     headers = {"x-api-key": B_KEY}
+    max_attempts = 30  # Limit the number of attempts
     for _ in range(max_attempts):
         response = requests.get(f"{API_URL}/{job_id}", headers=headers)
     return None
 def get_media_duration(file_path):
+    # Fetch media duration using ffprobe
     cmd = ['ffprobe', '-v', 'error', '-show_entries', 'format=duration', '-of', 'default=noprint_wrappers=1:nokey=1', file_path]
     result = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
     return float(result.stdout.strip())
 def combine_audio_video(video_path, audio_path, output_path):
+    # Get durations of both video and audio
     video_duration = get_media_duration(video_path)
     audio_duration = get_media_duration(audio_path)
     if video_duration > audio_duration:
+        # Trim video to match the audio length
         cmd = [
             'ffmpeg', '-i', video_path, '-i', audio_path,
+            '-t', str(audio_duration),  # Trim video to audio duration
             '-map', '0:v', '-map', '1:a',
             '-c:v', 'copy', '-c:a', 'aac',
             '-y', output_path
         ]
     else:
+        # Loop video if it's shorter than audio
+        loop_count = int(audio_duration // video_duration) + 1  # Calculate how many times to loop
         cmd = [
             'ffmpeg', '-stream_loop', str(loop_count), '-i', video_path, '-i', audio_path,
+            '-t', str(audio_duration),  # Match the duration of the final video with the audio
             '-map', '0:v', '-map', '1:a',
             '-c:v', 'copy', '-c:a', 'aac',
             '-shortest', '-y', output_path
     subprocess.run(cmd, check=True)
+def process_video(voice, model, text, progress=gr.Progress()):
+    session_id = str(uuid.uuid4())  # Generate a unique session ID
+    progress(0, desc="Generating speech...")
+    audio_path = text_to_speech(voice, text, session_id)
+    if not audio_path:
+        return None, "Failed to generate speech audio."
     progress(0.2, desc="Processing video...")
     video_path = os.path.join("models", model)
     try:
         progress(0.3, desc="Uploading files...")
         video_url = upload_file(video_path)
+        audio_url = upload_file(audio_path)
         if not video_url or not audio_url:
             raise Exception("Failed to upload files")
         progress(0.8, desc="Falling back to simple combination...")
         try:
             output_path = f"output_{session_id}.mp4"
+            combine_audio_video(video_path, audio_path, output_path)
             progress(1.0, desc="Complete!")
             return output_path, f"Used fallback method. Original error: {str(e)}"
         except Exception as fallback_error:
             return None, f"All methods failed. Error: {str(fallback_error)}"
     finally:
+        # Cleanup
+        if os.path.exists(audio_path):
+            os.remove(audio_path)
 def create_interface():
     voices = get_voices()
     with gr.Blocks() as app:
         gr.Markdown("# JSON Train")
         with gr.Row():
             with gr.Column():
+                voice_dropdown = gr.Dropdown(choices=[v[0] for v in voices], label="Select", value=voices[0][0] if voices else None)
+                model_dropdown = gr.Dropdown(choices=models, label="Select", value=models[0] if models else None)
+                text_input = gr.Textbox(label="Enter text", lines=3)
                 generate_btn = gr.Button("Generate Video")
             with gr.Column():
                 video_output = gr.Video(label="Generated Video")
                 status_output = gr.Textbox(label="Status", interactive=False)
+        def on_generate(voice_name, model_name, text):
             voice_id = next((v[1] for v in voices if v[0] == voice_name), None)
             if not voice_id:
                 return None, "Invalid voice selected."
+            return process_video(voice_id, model_name, text)
         generate_btn.click(
             fn=on_generate,
+            inputs=[voice_dropdown, model_dropdown, text_input],
             outputs=[video_output, status_output]
         )
 if __name__ == "__main__":
     app = create_interface()
+    app.launch()