Spaces:

sheikhed
/

json

Sleeping

App Files Files Community

sheikhed commited on Oct 11, 2024

Commit

355b39c

verified ·

1 Parent(s): eed4dc6

Update app.py

Browse files

Files changed (1) hide show

app.py +114 -79

app.py CHANGED Viewed

@@ -18,6 +18,11 @@ B_KEY = os.getenv("B_KEY")
 API_URL = os.getenv("API_URL")
 UPLOAD_URL = os.getenv("UPLOAD_URL")
 def get_voices():
     url = "https://api.elevenlabs.io/v1/voices"
     headers = {
@@ -55,40 +60,38 @@ def text_to_speech(voice_id, text, session_id):
     if response.status_code != 200:
         return None
-    # Save temporary audio file with session ID
-    audio_file_path = f'temp_voice_{session_id}.mp3'
     with open(audio_file_path, 'wb') as audio_file:
         audio_file.write(response.content)
     return audio_file_path
-def process_uploaded_audio(audio_file, session_id):
     """Process and validate uploaded audio file"""
-    if audio_file is None:
         return None
     # Get the file extension
-    ext = os.path.splitext(audio_file.name)[1].lower()
     if ext not in ['.mp3', '.wav', '.m4a', '.aac']:
         return None
-    # Save the uploaded file with session ID
-    audio_file_path = f'temp_voice_{session_id}{ext}'
-    with open(audio_file_path, 'wb') as f:
-        f.write(audio_file.read())
     # Convert to mp3 if not already mp3
     if ext != '.mp3':
-        mp3_path = f'temp_voice_{session_id}.mp3'
         cmd = [
-            'ffmpeg', '-i', audio_file_path,
             '-codec:a', 'libmp3lame', '-qscale:a', '2',
-            '-y', mp3_path
         ]
         subprocess.run(cmd, check=True)
-        os.remove(audio_file_path)
-        return mp3_path
-    return audio_file_path
 def upload_file(file_path):
     with open(file_path, 'rb') as file:
@@ -167,64 +170,71 @@ def combine_audio_video(video_path, audio_path, output_path):
 def process_video(voice, model, text, audio_file, progress=gr.Progress()):
     session_id = str(uuid.uuid4())
-    # Handle audio input (either text-to-speech or uploaded file)
-    if audio_file is not None:
-        progress(0.1, desc="Processing uploaded audio...")
-        audio_path = process_uploaded_audio(audio_file, session_id)
-        if not audio_path:
-            return None, "Failed to process uploaded audio file."
-    else:
-        progress(0.1, desc="Generating speech...")
-        audio_path = text_to_speech(voice, text, session_id)
-        if not audio_path:
-            return None, "Failed to generate speech audio."
-    progress(0.2, desc="Processing video...")
-    video_path = os.path.join("models", model)
     try:
-        progress(0.3, desc="Uploading files...")
-        video_url = upload_file(video_path)
-        audio_url = upload_file(audio_path)
-        if not video_url or not audio_url:
-            raise Exception("Failed to upload files")
-        progress(0.4, desc="Initiating lipsync...")
-        job_data = lipsync_api_call(video_url, audio_url)
-        if "error" in job_data or "message" in job_data:
-            raise Exception(job_data.get("error", job_data.get("message", "Unknown error")))
-        job_id = job_data["id"]
-        progress(0.5, desc="Processing lipsync...")
-        result_url = check_job_status(job_id)
-        if result_url:
-            progress(0.9, desc="Downloading result...")
-            response = requests.get(result_url)
-            output_path = f"output_{session_id}.mp4"
-            with open(output_path, "wb") as f:
-                f.write(response.content)
-            progress(1.0, desc="Complete!")
-            return output_path, "Lipsync completed successfully!"
-        else:
-            raise Exception("Lipsync processing failed or timed out")
-    except Exception as e:
-        progress(0.8, desc="Falling back to simple combination...")
         try:
-            output_path = f"output_{session_id}.mp4"
-            combine_audio_video(video_path, audio_path, output_path)
-            progress(1.0, desc="Complete!")
-            return output_path, f"Used fallback method. Original error: {str(e)}"
-        except Exception as fallback_error:
-            return None, f"All methods failed. Error: {str(fallback_error)}"
     finally:
-        # Cleanup
-        if os.path.exists(audio_path):
-            os.remove(audio_path)
 def create_interface():
     voices = get_voices()
@@ -232,9 +242,16 @@ def create_interface():
     with gr.Blocks() as app:
         gr.Markdown("# JSON Train")
         with gr.Row():
             with gr.Column():
-                with gr.Tab("Text to Speech"):
                     voice_dropdown = gr.Dropdown(
                         choices=[v[0] for v in voices],
                         label="Select Voice",
@@ -242,10 +259,11 @@ def create_interface():
                     )
                     text_input = gr.Textbox(label="Enter text", lines=3)
-                with gr.Tab("Upload Audio"):
-                    audio_input = gr.File(
-                        label="Upload Audio File",
-                        file_types=["audio/*"]
                     )
                 model_dropdown = gr.Dropdown(
@@ -259,15 +277,32 @@ def create_interface():
                 video_output = gr.Video(label="Generated Video")
                 status_output = gr.Textbox(label="Status", interactive=False)
-        def on_generate(voice_name, model_name, text, audio_file):
             voice_id = next((v[1] for v in voices if v[0] == voice_name), None)
-            if not voice_id:
-                return None, "Invalid voice selected."
-            return process_video(voice_id, model_name, text, audio_file)
         generate_btn.click(
             fn=on_generate,
-            inputs=[voice_dropdown, model_dropdown, text_input, audio_input],
             outputs=[video_output, status_output]
         )

 API_URL = os.getenv("API_URL")
 UPLOAD_URL = os.getenv("UPLOAD_URL")
+# Create temp directory if it doesn't exist
+TEMP_DIR = "temp"
+if not os.path.exists(TEMP_DIR):
+    os.makedirs(TEMP_DIR)
 def get_voices():
     url = "https://api.elevenlabs.io/v1/voices"
     headers = {
     if response.status_code != 200:
         return None
+    audio_file_path = os.path.join(TEMP_DIR, f'temp_voice_{session_id}.mp3')
     with open(audio_file_path, 'wb') as audio_file:
         audio_file.write(response.content)
     return audio_file_path
+def process_uploaded_audio(audio_path, session_id):
     """Process and validate uploaded audio file"""
+    if not audio_path:
         return None
     # Get the file extension
+    ext = os.path.splitext(audio_path)[1].lower()
     if ext not in ['.mp3', '.wav', '.m4a', '.aac']:
         return None
+    # Create output path
+    output_path = os.path.join(TEMP_DIR, f'temp_voice_{session_id}.mp3')
     # Convert to mp3 if not already mp3
     if ext != '.mp3':
         cmd = [
+            'ffmpeg', '-i', audio_path,
             '-codec:a', 'libmp3lame', '-qscale:a', '2',
+            '-y', output_path
         ]
         subprocess.run(cmd, check=True)
+        return output_path
+    else:
+        # If it's already MP3, just copy it to temp directory
+        with open(audio_path, 'rb') as src, open(output_path, 'wb') as dst:
+            dst.write(src.read())
+        return output_path
 def upload_file(file_path):
     with open(file_path, 'rb') as file:
 def process_video(voice, model, text, audio_file, progress=gr.Progress()):
     session_id = str(uuid.uuid4())
     try:
+        # Handle audio input (either text-to-speech or uploaded file)
+        if audio_file is not None:
+            progress(0.1, desc="Processing uploaded audio...")
+            audio_path = process_uploaded_audio(audio_file.name, session_id)
+            if not audio_path:
+                return None, "Failed to process uploaded audio file."
+        elif text:
+            progress(0.1, desc="Generating speech...")
+            audio_path = text_to_speech(voice, text, session_id)
+            if not audio_path:
+                return None, "Failed to generate speech audio."
+        else:
+            return None, "Please either enter text or upload an audio file."
+        progress(0.2, desc="Processing video...")
+        video_path = os.path.join("models", model)
         try:
+            progress(0.3, desc="Uploading files...")
+            video_url = upload_file(video_path)
+            audio_url = upload_file(audio_path)
+            if not video_url or not audio_url:
+                raise Exception("Failed to upload files")
+            progress(0.4, desc="Initiating lipsync...")
+            job_data = lipsync_api_call(video_url, audio_url)
+            if "error" in job_data or "message" in job_data:
+                raise Exception(job_data.get("error", job_data.get("message", "Unknown error")))
+            job_id = job_data["id"]
+            progress(0.5, desc="Processing lipsync...")
+            result_url = check_job_status(job_id)
+            if result_url:
+                progress(0.9, desc="Downloading result...")
+                response = requests.get(result_url)
+                output_path = os.path.join(TEMP_DIR, f"output_{session_id}.mp4")
+                with open(output_path, "wb") as f:
+                    f.write(response.content)
+                progress(1.0, desc="Complete!")
+                return output_path, "Lipsync completed successfully!"
+            else:
+                raise Exception("Lipsync processing failed or timed out")
+        except Exception as e:
+            progress(0.8, desc="Falling back to simple combination...")
+            try:
+                output_path = os.path.join(TEMP_DIR, f"output_{session_id}.mp4")
+                combine_audio_video(video_path, audio_path, output_path)
+                progress(1.0, desc="Complete!")
+                return output_path, f"Used fallback method. Original error: {str(e)}"
+            except Exception as fallback_error:
+                return None, f"All methods failed. Error: {str(fallback_error)}"
     finally:
+        # Cleanup temp files
+        for temp_file in os.listdir(TEMP_DIR):
+            if session_id in temp_file:
+                try:
+                    os.remove(os.path.join(TEMP_DIR, temp_file))
+                except:
+                    pass
 def create_interface():
     voices = get_voices()
     with gr.Blocks() as app:
         gr.Markdown("# JSON Train")
         with gr.Row():
             with gr.Column():
+                input_type = gr.Radio(
+                    choices=["Text to Speech", "Upload Audio"],
+                    label="Input Type",
+                    value="Text to Speech"
+                )
+                with gr.Group() as tts_group:
                     voice_dropdown = gr.Dropdown(
                         choices=[v[0] for v in voices],
                         label="Select Voice",
                     )
                     text_input = gr.Textbox(label="Enter text", lines=3)
+                with gr.Group() as audio_group:
+                    audio_input = gr.Audio(
+                        label="Upload Audio",
+                        source="upload",
+                        type="filepath"
                     )
                 model_dropdown = gr.Dropdown(
                 video_output = gr.Video(label="Generated Video")
                 status_output = gr.Textbox(label="Status", interactive=False)
+        def toggle_input_groups(choice):
+            if choice == "Text to Speech":
+                return gr.Group.update(visible=True), gr.Group.update(visible=False)
+            else:
+                return gr.Group.update(visible=False), gr.Group.update(visible=True)
+        input_type.change(
+            toggle_input_groups,
+            inputs=[input_type],
+            outputs=[tts_group, audio_group]
+        )
+        def on_generate(input_choice, voice_name, model_name, text, audio_file):
             voice_id = next((v[1] for v in voices if v[0] == voice_name), None)
+            if input_choice == "Text to Speech":
+                if not text:
+                    return None, "Please enter some text."
+                return process_video(voice_id, model_name, text, None)
+            else:
+                if not audio_file:
+                    return None, "Please upload an audio file."
+                return process_video(voice_id, model_name, None, audio_file)
         generate_btn.click(
             fn=on_generate,
+            inputs=[input_type, voice_dropdown, model_dropdown, text_input, audio_input],
             outputs=[video_output, status_output]
         )