Spaces:

sheikhed
/

json

Sleeping

App Files Files Community

sheikhed commited on Oct 11, 2024

Commit

b00f7c3

verified ·

1 Parent(s): 069e0f7

Update app.py

Browse files

Files changed (1) hide show

app.py +68 -15

app.py CHANGED Viewed

@@ -55,12 +55,23 @@ def text_to_speech(voice_id, text, session_id):
     if response.status_code != 200:
         return None
-    # Save temporary audio file with session ID
     audio_file_path = f'temp_voice_{session_id}.mp3'
     with open(audio_file_path, 'wb') as audio_file:
         audio_file.write(response.content)
     return audio_file_path
 def upload_file(file_path):
     with open(file_path, 'rb') as file:
         files = {'fileToUpload': (os.path.basename(file_path), file)}
@@ -92,7 +103,7 @@ def lipsync_api_call(video_url, audio_url):
 def check_job_status(job_id):
     headers = {"x-api-key": B_KEY}
-    max_attempts = 30  # Limit the number of attempts
     for _ in range(max_attempts):
         response = requests.get(f"{API_URL}/{job_id}", headers=headers)
@@ -115,12 +126,20 @@ def combine_audio_video(video_path, audio_path, output_path):
     ]
     subprocess.run(cmd, check=True)
-def process_video(voice, model, text, progress=gr.Progress()):
-    session_id = str(uuid.uuid4())  # Generate a unique session ID
-    progress(0, desc="Generating speech...")
-    audio_path = text_to_speech(voice, text, session_id)
-    if not audio_path:
-        return None, "Failed to generate speech audio."
     progress(0.2, desc="Processing video...")
     video_path = os.path.join("models", model)
@@ -177,23 +196,57 @@ def create_interface():
         gr.Markdown("# JSON Train")
         with gr.Row():
             with gr.Column():
-                voice_dropdown = gr.Dropdown(choices=[v[0] for v in voices], label="Select", value=voices[0][0] if voices else None)
-                model_dropdown = gr.Dropdown(choices=models, label="Select", value=models[0] if models else None)
-                text_input = gr.Textbox(label="json", lines=3)
                 generate_btn = gr.Button("Generate Video")
             with gr.Column():
                 video_output = gr.Video(label="Generated Video")
                 status_output = gr.Textbox(label="Status", interactive=False)
-        def on_generate(voice_name, model_name, text):
             voice_id = next((v[1] for v in voices if v[0] == voice_name), None)
-            if not voice_id:
                 return None, "Invalid voice selected."
-            return process_video(voice_id, model_name, text)
         generate_btn.click(
             fn=on_generate,
-            inputs=[voice_dropdown, model_dropdown, text_input],
             outputs=[video_output, status_output]
         )

     if response.status_code != 200:
         return None
     audio_file_path = f'temp_voice_{session_id}.mp3'
     with open(audio_file_path, 'wb') as audio_file:
         audio_file.write(response.content)
     return audio_file_path
+def handle_uploaded_audio(audio_file, session_id):
+    if audio_file is None:
+        return None
+    temp_path = f'temp_voice_{session_id}.mp3'
+    if isinstance(audio_file, str):  # If it's already a path
+        os.rename(audio_file, temp_path)
+    else:  # If it's a file object from Gradio
+        with open(temp_path, 'wb') as f:
+            f.write(audio_file)
+    return temp_path
 def upload_file(file_path):
     with open(file_path, 'rb') as file:
         files = {'fileToUpload': (os.path.basename(file_path), file)}
 def check_job_status(job_id):
     headers = {"x-api-key": B_KEY}
+    max_attempts = 30
     for _ in range(max_attempts):
         response = requests.get(f"{API_URL}/{job_id}", headers=headers)
     ]
     subprocess.run(cmd, check=True)
+def process_video(voice, model, text, audio_file, input_type, progress=gr.Progress()):
+    session_id = str(uuid.uuid4())
+    # Handle audio based on input type
+    if input_type == "text":
+        progress(0, desc="Generating speech...")
+        audio_path = text_to_speech(voice, text, session_id)
+        if not audio_path:
+            return None, "Failed to generate speech audio."
+    else:  # audio upload
+        progress(0, desc="Processing uploaded audio...")
+        audio_path = handle_uploaded_audio(audio_file, session_id)
+        if not audio_path:
+            return None, "Failed to process uploaded audio."
     progress(0.2, desc="Processing video...")
     video_path = os.path.join("models", model)
         gr.Markdown("# JSON Train")
         with gr.Row():
             with gr.Column():
+                input_type = gr.Radio(
+                    choices=["text", "audio"],
+                    label="Input Type",
+                    value="text"
+                )
+                # Text-to-speech components
+                with gr.Group() as text_group:
+                    voice_dropdown = gr.Dropdown(
+                        choices=[v[0] for v in voices],
+                        label="Select Voice",
+                        value=voices[0][0] if voices else None
+                    )
+                    text_input = gr.Textbox(label="Enter text", lines=3)
+                # Audio upload component
+                with gr.Group() as audio_group:
+                    audio_input = gr.Audio(label="Upload Audio File", type="filepath")
+                model_dropdown = gr.Dropdown(
+                    choices=models,
+                    label="Select Video Model",
+                    value=models[0] if models else None
+                )
                 generate_btn = gr.Button("Generate Video")
             with gr.Column():
                 video_output = gr.Video(label="Generated Video")
                 status_output = gr.Textbox(label="Status", interactive=False)
+        def toggle_input_groups(choice):
+            return (
+                gr.Group.update(visible=(choice == "text")),
+                gr.Group.update(visible=(choice == "audio"))
+            )
+        input_type.change(
+            fn=toggle_input_groups,
+            inputs=[input_type],
+            outputs=[text_group, audio_group]
+        )
+        def on_generate(input_type, voice_name, text, audio_file, model_name):
             voice_id = next((v[1] for v in voices if v[0] == voice_name), None)
+            if input_type == "text" and not voice_id:
                 return None, "Invalid voice selected."
+            return process_video(voice_id, model_name, text, audio_file, input_type)
         generate_btn.click(
             fn=on_generate,
+            inputs=[input_type, voice_dropdown, text_input, audio_input, model_dropdown],
             outputs=[video_output, status_output]
         )