Spaces:

fffiloni
/

vta-ldm

Sleeping

File size: 3,260 Bytes

b71b6bf
 
 
 
 
 
 
466df4f
b71b6bf
7b4a4e7
b71b6bf
 
 
 
 
 
 
 
 
 
 
 
 
 
abd5868
 
 
e932cdf
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b71b6bf
 
 
 
 
 
c14bad7
 
b71b6bf
 
c14bad7
b71b6bf
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0e7972e
e932cdf
 
4c7d0b3
b71b6bf
 
 
 
 
 
4c7d0b3
 
b71b6bf

import gradio as gr
import huggingface_hub
import os 
import subprocess
import threading

# download model
huggingface_hub.snapshot_download(
    repo_id='ariesssxu/vta-ldm-clip4clip-v-large',
    local_dir='./ckpt/vta-ldm-clip4clip-v-large'
)

def stream_output(pipe):
    for line in iter(pipe.readline, ''):
        print(line, end='')

def print_directory_contents(path):
    for root, dirs, files in os.walk(path):
        level = root.replace(path, '').count(os.sep)
        indent = ' ' * 4 * (level)
        print(f"{indent}{os.path.basename(root)}/")
        subindent = ' ' * 4 * (level + 1)
        for f in files:
            print(f"{subindent}{f}")

# Print the ckpt directory contents
print_directory_contents('./ckpt')

def get_wav_files(path):
    wav_files = []  # Initialize an empty list to store the paths of .wav files
    for root, dirs, files in os.walk(path):
        level = root.replace(path, '').count(os.sep)
        indent = ' ' * 4 * (level)
        print(f"{indent}{os.path.basename(root)}/")
        subindent = ' ' * 4 * (level + 1)
        for f in files:
            file_path = os.path.join(root, f)
            if f.lower().endswith('.wav'):
                wav_files.append(file_path)  # Add .wav file paths to the list
                print(f"{subindent}{file_path}")
            else:
                print(f"{subindent}{f}")
    return wav_files  # Return the list of .wav file paths

def infer(video_in):

    # Need to find path to gradio temp vid from video input
    # path_to_video

    print(f"VIDEO IN PATH: {video_in}")
    # Get the directory name
    folder_path = os.path.dirname(video_in)

    # Execute the inference command
    command = ['python', 'inference_from_video.py', '--original_args', 'ckpt/vta-ldm-clip4clip-v-large/summary.jsonl', '--model', 'ckpt/vta-ldm-clip4clip-v-large/pytorch_model_2.bin', '--data_path', folder_path]
    process = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True, bufsize=1)

    # Create threads to handle stdout and stderr
    stdout_thread = threading.Thread(target=stream_output, args=(process.stdout,))
    stderr_thread = threading.Thread(target=stream_output, args=(process.stderr,))

    # Start the threads
    stdout_thread.start()
    stderr_thread.start()

    # Wait for the process to complete and the threads to finish
    process.wait()
    stdout_thread.join()
    stderr_thread.join()

    print("Inference script finished with return code:", process.returncode)

    # Need to find where are the results stored, default should be "./outputs/tmp"
    # Print the outputs directory contents
    print_directory_contents('./outputs/tmp')
    wave_files = get_wav_files('./outputs/tmp')
    print(wave_files)
    return wave_files[0]

with gr.Blocks() as demo:
    with gr.Column(elem_id="col-container"):
        gr.Markdown("# Video-To-Audio")
        video_in = gr.Video(label='Video IN')
        submit_btn = gr.Button("Submit")
        output_sound = gr.Audio(label="Audio OUT")
        #output_sound = gr.Textbox(label="Audio OUT")
    submit_btn.click(
        fn = infer,
        inputs = [video_in],
        outputs = [output_sound],
        show_api = False
    )
demo.launch(show_api=False, show_error=True)