Spaces:

Maximofn
/

subtify

Build error

File size: 26,879 Bytes

import gradio as gr
import argparse
import spaces
import os
import torch
import shutil
from time import sleep
from tqdm import tqdm
from lang_list import union_language_dict
# import pyperclip
import re
from PIL import Image
# import urllib.request
from ui_config import (
    BACKGROUND_COLOR, BUTTON_COLOR, SVG_COLOR, PANEL_COLOR,
    PRIMARY_TEXT_COLOR, SUBDUED_TEXT_COLOR, BACKGROUND_PRIMARY_COLOR,
    BACKGROUND_SECONDARY_COLOR, PRIMARY_BODER_COLOR, BLOCK_TITLE_TEXT_COLOR,
    INPUT_BACKGROUND_COLOR, INPUT_BORDER_COLOR, INPUT_PLACEHOLDER_COLOR,
    ERROR_BACKGROUND_COLOR, ERROR_TEXT_COLOR, ERROR_BORDER_COLOR,
    BUTTON_SECONDARY_BACKGROUND_COLOR, BUTTON_SECONDARY_BORDER_COLOR,
    BUTTON_SECONDARY_TEXT_COLOR, RED, GREEN, BLUE,
    html_social_media, get_html_subtify_logo_big, get_html_subtify_logo_small, html_buy_me_a_coffe
)
# from url_manager import get_youtube_thumbnail, is_valid_youtube_url, is_valid_twitch_url, is_valid_url
from slice_audio import slice_audio as slice_audio_main
from audio import get_audio_from_video
from transcribe import transcribe, get_language_dict
from diarize_library import diarize_audio
import json

NUMBER = 100
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
# DEVICE = "cpu"
DOWNLOAD = True
SLICE_AUDIO = True
TRANSCRIBE_AUDIO = True
CONCATENATE_TRANSCRIPTIONS = True
TRANSLATE_TRANSCRIPTIONS = True
ADD_SUBTITLES_TO_VIDEO = True
REMOVE_FILES = True
if DEVICE == "cpu":
    # Assuming we are on huggingface server
    ram = 16000
    factor = 1
    CHUNK_SECONDS = int(ram*factor)
    CHUNK_SECONDS = 30
    CHUNK_OVERLAP_SECONDS = 5
    print(f"RAM: {ram}, CHUNK_SECONDS: {CHUNK_SECONDS}, CHUNK_OVERLAP_SECONDS: {CHUNK_OVERLAP_SECONDS}")
else:
    # Assuming we are on local machine
    CHUNK_SECONDS = 30
    CHUNK_OVERLAP_SECONDS = 5

YOUTUBE = "youtube"
TWITCH = "twitch"
ERROR = "error"

VIEW_OUTPUTS = True
DEBUG = True

subtify_logo = Image.open("assets/subtify_logo-scaled.png")
subtify_logo_width, subtify_logo_height = subtify_logo.size
factor = 4
new_width = subtify_logo_width // factor
new_height = subtify_logo_height // factor

html_subtify_logo_big = get_html_subtify_logo_big(new_width, new_height)
html_subtify_logo_small = get_html_subtify_logo_small(new_width, new_height)

language_dict = union_language_dict()

def remove_all_files():
    """Remove all temporary files and folders"""
    if os.path.exists("audios"):
        command = f"rm -r audios"
        os.system(command)
    if os.path.exists("chunks"):
        command = f"rm -r chunks"
        os.system(command)
    if os.path.exists("concatenated_transcriptions"):
        command = f"rm -r concatenated_transcriptions"
        os.system(command)
    if os.path.exists("transcriptions"):
        command = f"rm -r transcriptions"
        os.system(command)
    if os.path.exists("translated_transcriptions"):
        command = f"rm -r translated_transcriptions"
        os.system(command)
    if os.path.exists("videos"):
        command = f"rm -r videos"
        os.system(command)
    if os.path.exists("vocals"):
        command = f"rm -r vocals"
        os.system(command)

def reset_frontend():
    """Reset all frontend elements to their default state"""
    visible = False
    return (
        None,
        gr.Image(visible=visible),
        gr.Dropdown(visible=visible),
        gr.Dropdown(visible=visible),
        gr.Dropdown(visible=visible),
        gr.Accordion(visible=visible),
        gr.Button(visible=visible),
        gr.Textbox(visible=visible),
        gr.Textbox(visible=visible),
        gr.Textbox(visible=visible),
        gr.Textbox(visible=visible),
        gr.Textbox(visible=visible),
        gr.Textbox(visible=visible),
        gr.Textbox(visible=visible),
        gr.Textbox(visible=visible),
        gr.Textbox(visible=visible),
        gr.Video(visible=visible),
    )

def show_auxiliar_block1():
    """Show auxiliary block 1 with URL checked message"""
    return gr.Textbox(value="URL checked", visible=False)

def change_visibility_texboxes():
    """Change visibility of progress info textboxes"""
    return (
        gr.update(value="Done"), # auxiliar_block1
        gr.update(visible=True), # get_audio_from_video_info
        gr.update(visible=True), # merged_transcription
        gr.update(visible=True), # video_sliced_progress_info
        gr.update(visible=True), # video_transcribed_progress_info
        gr.update(visible=True), # diarization_progress_info
        gr.update(visible=True), # transcriptions_concatenated_progress_info
        gr.update(visible=True), # video_translated_progress_info
        gr.update(visible=True), # video_subtitled_progress_info
    )

def get_audio(video_path):
    """
    Extract audio from video file.
    
    Args:
        video_path (str): Path to video file
        
    Returns:
        list: Status update and audio file path
    """
    print('*'*NUMBER)
    print(f"Getting audio from video {video_path}")

    audios_folder = "audios"

    if DEBUG:
        audio_file = f"{audios_folder}/download_audio.mp3"
        if os.path.exists(audio_file):
            return [
                gr.update(value="Loaded"),  # get_audio_from_video_info
                gr.update(value=audio_file)  # original_audio_path
            ]
    
    try:
        audio_path = get_audio_from_video(video_path, audios_folder)
        return [
            gr.update(value="Ok"),  # get_audio_from_video_info
            gr.update(value=audio_path)  # original_audio_path
        ]
    except Exception as e:
        print(f"Error: {str(e)}")
        return [
            gr.update(value="Error"),  # get_audio_from_video_info
            gr.update(value="")  # original_audio_path
        ]

def slice_audio(input_audio_path):
    """
    Slice audio into chunks.
    
    Args:
        input_audio_path (str): Path to input audio file
    """
    print('*'*NUMBER)
    print(f"Slicing audio {input_audio_path} in chunks of {CHUNK_SECONDS} seconds with {CHUNK_OVERLAP_SECONDS} seconds overlap")

    # Create vocals and chunks folders
    print("Creating vocals and chunks folders")
    folder_vocals = "vocals"
    folder_chunck = "chunks"
    if not os.path.exists(folder_vocals):
        os.makedirs(folder_vocals)
    if not os.path.exists(folder_chunck):
        os.makedirs(folder_chunck)
    
    slice_audio_main(input_audio_path, folder_chunck, CHUNK_SECONDS, CHUNK_OVERLAP_SECONDS)

    return (
        gr.update(value="Ok"),  # video_sliced_progress_info
    )

def diarize(input_audio_path, num_speakers, min_speakers, max_speakers):
    """
    Perform speaker diarization on audio file.
    
    Args:
        input_audio_path (str): Path to audio file
        num_speakers (int): Expected number of speakers
        min_speakers (int): Minimum number of speakers
        max_speakers (int): Maximum number of speakers
    """
    print('*'*NUMBER)
    print(f"Diarize {input_audio_path}")

    # Diarization file
    diarization_file = f"diarization/diarization.json"

    if DEBUG:
        if os.path.exists(diarization_file):
            with open(diarization_file, "r") as f:
                diarization = f.read()
            return [
                gr.update(value="Loaded"),
                gr.update(value=diarization)
            ]

    # Diarize audio
    diarization = diarize_audio(input_audio_path, num_speakers, min_speakers, max_speakers, DEVICE)

    # Save diarization
    with open(diarization_file, "w") as f:
        json.dump(diarization, f)

    return [
        gr.update(value="Ok"),
        gr.update(value=diarization)
    ]

def trascribe_audio(input_audio_path, source_languaje):
    print('*'*NUMBER)
    print(f"Transcript {input_audio_path}")

    # Transcription file
    transcription_file = f"transcriptions/transcription_{source_languaje}.json"

    if DEBUG:
        if os.path.exists(transcription_file):
            transcription = open(transcription_file, "r").read()
            transcription = json.loads(transcription)
            return [
                gr.update(value="Loaded"),
                gr.update(value=transcription)
            ]

    # Get language dict
    language_dict = get_language_dict()

    # Transcribe audio file
    transcription_str, transcription_dict = transcribe(input_audio_path, language_dict[source_languaje]["transcriber"], DEVICE, CHUNK_SECONDS, CHUNK_OVERLAP_SECONDS)

    # Save transcription
    with open(transcription_file, "w") as f:
        transcription_json = json.dumps(transcription_dict)
        f.write(transcription_json)
        
    return (
        gr.update(value="Ok"),
        gr.update(value=transcription_dict)
    )

def concatenate_transcriptions():
    print('*'*NUMBER)
    print("Concatenate transcriptions")

    folder_concatenated = "concatenated_transcriptions"
    if not os.path.exists(folder_concatenated):
        os.makedirs(folder_concatenated)

    chunck_file = "chunks/output_files.txt"
    python_file = "concat_transcriptions.py"
    command = f"python {python_file} {chunck_file} {CHUNK_SECONDS} {CHUNK_OVERLAP_SECONDS}"
    os.system(command)

    with open(chunck_file, 'r') as f:
        files = f.read().splitlines()
    for file in files:
        file_name, _ = file.split(".")
        _, file_name = file_name.split("/")
        transcriptions_folder = "transcriptions"
        transcription_extension = "srt"
        command = f"rm {transcriptions_folder}/{file_name}.{transcription_extension}"
        os.system(command)

    audio_transcribed = "concatenated_transcriptions/download_audio.srt"

    return (
        gr.Textbox(value="Ok"),
        gr.Textbox(value=audio_transcribed),
    )

def translate_transcription(original_audio_transcribed_path, source_languaje, target_languaje):
    print('*'*NUMBER)
    print("Translate transcription")

    folder_translated_transcriptions = "translated_transcriptions"
    if not os.path.exists(folder_translated_transcriptions):
        os.makedirs(folder_translated_transcriptions)

    python_file = "translate_transcriptions.py"
    command = f"python {python_file} {original_audio_transcribed_path} --source_languaje {source_languaje} --target_languaje {target_languaje} --device {DEVICE}"
    os.system(command)

    translated_transcription = f"translated_transcriptions/download_audio_{target_languaje}.srt"

    transcription_file = "concatenated_transcriptions/download_audio.srt"
    if os.path.exists(transcription_file):
        command = f"rm {transcription_file}"
        os.system(command)

    return (
        gr.Textbox(value="Ok"),
        gr.Textbox(value=translated_transcription)
    )

def add_translated_subtitles_to_video(original_video_path, original_audio_path, original_audio_translated_path):
    print('*'*NUMBER)
    print("Add subtitles to video")
    
    python_file = "add_subtitles_to_video.py"
    command = f"python {python_file} {original_audio_translated_path} {original_video_path} {original_audio_path}"
    os.system(command)

    if os.path.exists(original_video_path):
        command = f"rm {original_video_path}"
        os.system(command)
    if os.path.exists(original_audio_path):
        command = f"rm {original_audio_path}"
        os.system(command)
    if os.path.exists(original_audio_translated_path):
        command = f"rm {original_audio_translated_path}"
        os.system(command)
    if os.path.exists("chunks/output_files.txt"):
        command = f"rm chunks/output_files.txt"
        os.system(command)

    subtitled_video = "videos/download_video_with_subtitles.mp4"
    
    visible = False
    return (
        gr.Video(value=subtitled_video, visible=True),
        gr.Textbox(value="Ok", visible=visible),
        gr.Textbox(value="Ok"),
    )

def hide_textbobes_progress_info():
    visible = False
    return (
        gr.Textbox(value="Waiting", visible=visible),
        gr.Textbox(value="Waiting", visible=visible),
        gr.Textbox(value="Waiting", visible=visible),
        gr.Textbox(value="Waiting", visible=visible),
        gr.Textbox(value="Waiting", visible=visible),
        gr.Textbox(value="Waiting", visible=visible),
    )

def process_uploaded_video(video_path):
    # Create videos folder
    videos_folder = "videos"
    if not os.path.exists(videos_folder):
        os.makedirs(videos_folder)

    if DEBUG:
        video_file = f"{videos_folder}/download_video.mp4"
        if os.path.exists(video_file):
            return [
                gr.update(label="Video uploaded"),  # video_input
                gr.update(visible=True),  # config_block
                gr.update(value=video_file),  # original_video_path
                gr.update(value=html_subtify_logo_small)  # html_subtify_logo_component
            ]
        
    # Copy uploaded video to videos folder
    new_video_path = os.path.join(videos_folder, "download_video.mp4")
    shutil.copy(video_path, new_video_path)

    # Return updated config block with new scale and the new video path
    return [
        gr.update(label="Video uploaded"),  # video_input
        gr.update(visible=True),  # config_block
        gr.update(value=new_video_path),  # original_video_path
        gr.update(value=html_subtify_logo_small)  # html_subtify_logo_component
    ]

def merge_transcription_and_diarization():
    """
    Merge transcription and diarization results to assign speakers to each word.
    
    Returns:
        dict: Combined transcription with speaker information
    """
    print('*'*NUMBER)
    print("Merge transcription and diarization")
    
    if DEBUG:
        merged_transcription_path = "merged_transcription_diarization/merged.json"
        if os.path.exists(merged_transcription_path):
            with open(merged_transcription_path, 'r') as f:
                merged_transcription = json.load(f)
            return [
                gr.update(value="Loaded"),
                gr.update(value=merged_transcription)
            ]

    # Load JSON files
    transcription_path = "transcriptions/transcription_English.json"
    diarization_path = "diarization/diarization.json"
    with open(transcription_path, 'r') as f:
        transcription = json.load(f)
    with open(diarization_path, 'r') as f:
        diarization = json.load(f)
    
    # Create new list for combined chunks
    merged_chunks = []
    
    # For each word in transcription
    for chunk in transcription.get('chunks', []):
        # Verify chunk has valid timestamps
        if not (isinstance(chunk.get('start'), (int, float)) and 
                isinstance(chunk.get('end'), (int, float))):
            continue
            
        word_start = float(chunk['start'])
        word_end = float(chunk['end'])
        
        # Find corresponding speaker in diarization
        speaker = None
        for segment in diarization:
            # Verify segment has valid timestamps
            if not (isinstance(segment.get('start'), (int, float)) and 
                    isinstance(segment.get('end'), (int, float))):
                continue
                
            segment_start = float(segment['start'])
            segment_end = float(segment['end'])
            
            # If word is within segment time range
            if (word_start >= segment_start and word_end <= segment_end):
                speaker = segment['speaker']
                break
            
            # If word is mostly within segment (>50% duration)
            word_duration = word_end - word_start
            overlap_start = max(word_start, segment_start)
            overlap_end = min(word_end, segment_end)
            overlap_duration = max(0, overlap_end - overlap_start)
            
            if overlap_duration > word_duration * 0.5:
                speaker = segment['speaker']
                break
        
        # Create new chunk with speaker information
        merged_chunk = {
            'start': word_start,
            'end': word_end,
            'text': chunk['text'],
            'speaker': speaker if speaker else 'UNKNOWN'
        }
        merged_chunks.append(merged_chunk)
    
    # Create final dictionary
    merged_transcription = {
        'text': transcription.get('text', ''),
        'chunks': merged_chunks
    }
    
    # Create directory if it doesn't exist
    if not os.path.exists(merged_transcription_path):
        os.makedirs(merged_transcription_path)
    
    # Save result to new directory
    with open(merged_transcription_path, 'w', encoding='utf-8') as f:
        json.dump(merged_transcription, f, ensure_ascii=False, indent=2)
    
    return [
        gr.update(value="Ok"),
        gr.update(value=merged_transcription)
    ]

@spaces.GPU
def subtify():
    with gr.Blocks(
        theme=gr.themes.Default().set
        (
            body_background_fill=BACKGROUND_COLOR, 
            body_background_fill_dark=BACKGROUND_COLOR,
            body_text_color=PRIMARY_TEXT_COLOR,
            body_text_color_dark=PRIMARY_TEXT_COLOR,
            body_text_color_subdued=SUBDUED_TEXT_COLOR,
            body_text_color_subdued_dark=SUBDUED_TEXT_COLOR,
            background_fill_primary=BACKGROUND_PRIMARY_COLOR,
            background_fill_primary_dark=BACKGROUND_PRIMARY_COLOR,
            background_fill_secondary=BACKGROUND_SECONDARY_COLOR,
            background_fill_secondary_dark=BACKGROUND_SECONDARY_COLOR,
            border_color_primary=PRIMARY_BODER_COLOR,
            border_color_primary_dark=PRIMARY_BODER_COLOR,
            block_background_fill=BACKGROUND_PRIMARY_COLOR,
            block_background_fill_dark=BACKGROUND_PRIMARY_COLOR,
            block_title_text_color=BLOCK_TITLE_TEXT_COLOR,
            block_title_text_color_dark=BLOCK_TITLE_TEXT_COLOR,
            input_background_fill=INPUT_BACKGROUND_COLOR,
            input_background_fill_dark=INPUT_BACKGROUND_COLOR,
            input_border_color=INPUT_BORDER_COLOR,
            input_border_color_dark=INPUT_BORDER_COLOR,
            input_placeholder_color=INPUT_PLACEHOLDER_COLOR,
            input_placeholder_color_dark=INPUT_PLACEHOLDER_COLOR,
            error_background_fill=ERROR_BACKGROUND_COLOR,
            error_background_fill_dark=ERROR_BACKGROUND_COLOR,
            error_text_color=ERROR_TEXT_COLOR,
            error_text_color_dark=ERROR_TEXT_COLOR,
            error_border_color=ERROR_BORDER_COLOR,
            error_border_color_dark=ERROR_BORDER_COLOR,
            button_secondary_background_fill=BUTTON_SECONDARY_BACKGROUND_COLOR,
            button_secondary_background_fill_dark=BUTTON_SECONDARY_BACKGROUND_COLOR,
            button_secondary_border_color=BUTTON_SECONDARY_BORDER_COLOR,
            button_primary_background_fill_dark=BUTTON_SECONDARY_BORDER_COLOR,
            button_secondary_text_color=BUTTON_SECONDARY_TEXT_COLOR,
            button_secondary_text_color_dark=BUTTON_SECONDARY_TEXT_COLOR,
        )
    ) as demo:
        num_speaker = []
        for i in range(100, 0, -1):
            num_speaker.append(i)

        # Layout
        gr.HTML(html_social_media)
        gr.HTML("<h1 style='text-align: center;'>Subtify</h1>")
        html_subtify_logo_component = gr.HTML(html_subtify_logo_big)
        
        # Input block, where the user can upload a video and configure the subtify process
        visible = False
        input_block = gr.Row(variant="panel")
        with input_block:
            input_video_block = gr.Row(scale=2)
            with input_video_block:
                video_input = gr.Video(
                    label="Upload video", 
                    sources=["upload"], 
                    scale=1, 
                    interactive=True
                )
                delete_button = gr.Button(size="sm", icon="icons/delete.svg", value="clear", min_width="10px", scale=0)
            
            config_block = gr.Column(scale=1, visible=visible)
            with config_block:
                with gr.Row():
                    source_languaje = gr.Dropdown(visible=True, label="Source languaje", show_label=True, value="English", choices=language_dict, scale=1, interactive=True, info="Language of the video")
                    target_languaje = gr.Dropdown(visible=True, label="Target languaje", show_label=True, value="Español", choices=language_dict, scale=1, interactive=True, info="Language to translate the subtitles")
                with gr.Accordion("Advanced settings", open=True, visible=True) as Advanced_setings:
                    number_of_speakers = gr.Number(visible=True, label="Number of speakers", show_label=True, value=0, interactive=True, info="Number of speakers in the video, if you don't know, select 0")
                    min_speakers = gr.Number(visible=True, label="Min speakers", show_label=True, value=0, scale=0, interactive=True, info="Minimum number of speakers in the video")
                    max_speakers = gr.Number(visible=True, label="Max speakers", show_label=True, value=0, scale=0, interactive=True, info="Maximum number of speakers in the video")
                subtify_button = gr.Button(size="lg", value="subtify", min_width="10px", scale=0, visible=True)

        auxiliar_block1 = gr.Textbox(placeholder="", interactive=False, visible=visible)
        with gr.Row():
            get_audio_from_video_info = gr.Textbox(placeholder="Waiting", label="Get audio from video info", elem_id="get_audio_from_video_info", interactive=False, visible=visible)
            video_transcribed_progress_info = gr.Textbox(placeholder="Waiting", label="Transcribe progress info", elem_id="video_transcribed_progress_info", interactive=False, visible=visible)
            diarization_progress_info = gr.Textbox(placeholder="Waiting", label="Diarize progress info", elem_id="diarization_progress_info", interactive=False, visible=visible)
            merged_transcription_progress_info = gr.Textbox(placeholder="Waiting", label="Merge transcription and diarization progress info", elem_id="merged_transcription_progress_info", interactive=False, visible=visible)
            transcriptions_concatenated_progress_info = gr.Textbox(placeholder="Waiting", label="Concatenate progress info", elem_id="transcriptions_concatenated_progress_info", interactive=False, visible=visible)
            video_translated_progress_info = gr.Textbox(placeholder="Waiting", label="Translate progress info", elem_id="transcription_translated_progress_info", interactive=False, visible=visible)
            video_subtitled_progress_info = gr.Textbox(placeholder="Waiting", label="Video subtitle progress info", elem_id="video_subtitled_progress_info", interactive=False, visible=visible)

        original_audio_path = gr.Textbox(label="Original audio path", elem_id="original_audio_path", visible=visible)
        original_video_path = gr.Textbox(label="Original video path", visible=visible)
        transcription = gr.Textbox(label="transcription", elem_id="transcription", visible=VIEW_OUTPUTS)
        diarization = gr.Textbox(label="diarization", elem_id="diarization", visible=VIEW_OUTPUTS)
        merged_transcription = gr.Textbox(label="merged_transcription", elem_id="merged_transcription", visible=VIEW_OUTPUTS)
        original_audio_translated_path = gr.Textbox(label="Original audio translated", elem_id="original_audio_translated", visible=visible)
        subtitled_video = gr.Video(label="Subtitled video", elem_id="subtitled_video", visible=visible, interactive=visible)
        auxiliar_block3 = gr.Textbox(placeholder="Waiting", label="Auxiliar block 3", elem_id="auxiliar_block3", interactive=False, visible=visible)

        gr.HTML(html_buy_me_a_coffe)

        # Events
        # paste_button.click(fn=paste_url_from_clipboard, outputs=url_textbox)
        delete_button.click(
            fn=reset_frontend,
            outputs=[
                video_input,
                source_languaje,
                target_languaje,
                Advanced_setings,
                number_of_speakers,
                subtify_button,
                auxiliar_block1,
                video_transcribed_progress_info,
                transcriptions_concatenated_progress_info,
                video_translated_progress_info,
                video_subtitled_progress_info,
                subtitled_video,
            ]
        )
        video_input.change(
            fn=process_uploaded_video,
            inputs=[video_input],
            outputs=[video_input, config_block, original_video_path, html_subtify_logo_component]
        )
        subtify_button.click(
            fn=change_visibility_texboxes,
            outputs=[auxiliar_block1, get_audio_from_video_info, merged_transcription_progress_info, video_transcribed_progress_info, diarization_progress_info, transcriptions_concatenated_progress_info, video_translated_progress_info, video_subtitled_progress_info]
        )
        auxiliar_block1.change(
            fn=get_audio,
            inputs=[original_video_path],
            outputs=[get_audio_from_video_info, original_audio_path]
        )
        get_audio_from_video_info.change(
            fn=trascribe_audio,
            inputs=[original_audio_path, source_languaje],
            outputs=[video_transcribed_progress_info, transcription]
        )
        video_transcribed_progress_info.change(
            fn=diarize,
            inputs=[original_audio_path, number_of_speakers, min_speakers, max_speakers],
            outputs=[diarization_progress_info, diarization]
        )
        diarization_progress_info.change(
            fn=merge_transcription_and_diarization,
            outputs=[merged_transcription_progress_info, merged_transcription]
        )
        # transcriptions_concatenated_progress_info.change(
        #     fn=translate_transcription,
        #     inputs=[original_audio_transcribed_path, source_languaje, target_languaje],
        #     outputs=[video_translated_progress_info, original_audio_translated_path]
        # )
        # video_translated_progress_info.change(
        #     fn=add_translated_subtitles_to_video,
        #     inputs=[original_video_path, original_audio_path, original_audio_translated_path],
        #     outputs=[subtitled_video, video_subtitled_progress_info, auxiliar_block3]
        # )
        # auxiliar_block3.change(
        #     fn=hide_textbobes_progress_info,
        #     outputs=[video_sliced_progress_info, video_transcribed_progress_info, transcriptions_concatenated_progress_info, video_translated_progress_info, video_subtitled_progress_info]
        # )
        
    demo.launch()


if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument("--no_ui", action="store_true")
    parser.add_argument("--remove_all_files", action="store_true")
    args = parser.parse_args()

    if args.no_ui:
        subtify_no_ui()
    elif args.remove_all_files:
        remove_all_files()
    else:
        subtify()