Spaces:

wissemkarous
/

PFA-Demo

Sleeping

File size: 8,014 Bytes

# import streamlit as st
# import os
# from utils.demo import load_video, ctc_decode
# from utils.two_stream_infer import load_model
# import os
# from scripts.extract_lip_coordinates import generate_lip_coordinates
# import options as opt

# st.set_page_config(layout="wide")

# model = load_model()

# st.title("Lipreading final year project Demo")

# st.info(
#     "The inference speed is very slow on Huggingface spaces due to it being processed entirely on CPU ",
#     icon="ℹ️",
# )
# # Generating a list of options or videos
# options = os.listdir(os.path.join("app_input"))
# selected_video = st.selectbox("Choose video", options)

# col1, col2 = st.columns(2)


# with col1:
#     file_path = os.path.join("app_input", selected_video)
#     video_name = selected_video.split(".")[0]
#     os.system(f"ffmpeg -i {file_path} -vcodec libx264 {video_name}.mp4 -y")

#     # Rendering inside of the app
#     video = open(f"{video_name}.mp4", "rb")
#     video_bytes = video.read()
#     st.video(video_bytes)


# with col1, st.spinner("Splitting video into frames"):
#     video, img_p, files = load_video(f"{video_name}.mp4", opt.device)
#     prediction_video = video
#     st.markdown(f"Frames Generated:\n{files}")
#     frames_generated = True
# with col1, st.spinner("Generating Lip Landmark Coordinates"):
#     coordinates = generate_lip_coordinates(f"{video_name}_samples")
#     prediction_coordinates = coordinates
#     st.markdown(f"Coordinates Generated:\n{coordinates}")
#     coordinates_generated = True

# with col2:
#     st.info("Ready to make prediction!")
#     generate = st.button("Generate")
#     if generate:
#         with col2, st.spinner("Generating..."):
#             y = model(
#                 prediction_video[None, ...].to(opt.device),
#                 prediction_coordinates[None, ...].to(opt.device),
#             )
#             txt = ctc_decode(y[0])
#             st.text(txt[-1])
            
# st.info("Author ©️ : wissem karous ")
# st.info("Made with ❤️  ")

///////////////////
# import streamlit as st
# import os
# from utils.demo import load_video, ctc_decode
# from utils.two_stream_infer import load_model
# from scripts.extract_lip_coordinates import generate_lip_coordinates
# import options as opt

# st.set_page_config(layout="wide")

# model = load_model()

# st.title("Lipreading final year project Demo")

# st.info(
#     "The inference speed is very slow on Huggingface spaces due to it being processed entirely on CPU ",
#     icon="ℹ️",
# )

# # Generating a list of options or videos
# options = sorted(os.listdir(os.path.join("app_input")))  # Ensure the list is sorted
# selected_video = st.selectbox("Choose video", options)

# # Find the index of the selected video and calculate the index of the next video
# selected_index = options.index(selected_video)
# next_video_index = (selected_index + 1) % len(options)  # Ensures looping back to start
# next_video = options[next_video_index]

# col1, col2 = st.columns(2)

# # Function to display video in a column
# def display_video(column, video_path, video_name):
#     os.system(f"ffmpeg -i {video_path} -vcodec libx264 {video_name}.mp4 -y")
#     video = open(f"{video_name}.mp4", "rb")
#     video_bytes = video.read()
#     column.video(video_bytes)

# # Displaying the selected video in the first column
# with col1:
#     file_path = os.path.join("app_input", selected_video)
#     video_name = selected_video.split(".")[0]
#     display_video(col1, file_path, video_name)
# # Displaying the next video in the second column
# with col2:
#     st.info("Expected Result !")
#     next_file_path = os.path.join("app_input", next_video)
#     next_video_name = next_video.split(".")[0]
#     display_video(col2, next_file_path, next_video_name)


# # Assuming further processing (like generating predictions) is only intended for the first (selected) video
# with col1, st.spinner("Processing video..."):
#     video, img_p, files = load_video(f"{video_name}.mp4", opt.device)
#     coordinates = generate_lip_coordinates(f"{video_name}_samples")
#     # Assuming 'frames_generated' and 'coordinates_generated' are used for control flow or further processing
#     frames_generated = True
#     coordinates_generated = True
#     if frames_generated and coordinates_generated:
#         st.markdown(f"Frames Generated for {video_name}:\n{files}")
#         st.markdown(f"Coordinates Generated for {video_name}:\n{coordinates}")

# with col2:
#     st.info("Ready to make prediction!")
#     generate = st.button("Generate")
#     if generate:
#         with st.spinner("Generating..."):
#             y = model(
#                 video[None, ...].to(opt.device),
#                 coordinates[None, ...].to(opt.device),
#             )
#             txt = ctc_decode(y[0])
#             st.text(txt[-1])

# st.info("Author ©️ : Wissem Karous ")
# st.info("Made with ❤️")
/////////////////////

import streamlit as st
import os
import cv2
from utils.demo import load_video, ctc_decode
from utils.two_stream_infer import load_model
from scripts.extract_lip_coordinates import generate_lip_coordinates
import options as opt

st.set_page_config(layout="wide")

model = load_model()

st.title("Lipreading Final Year Project Demo")

st.info(
    "The inference speed is very slow on Huggingface spaces due to it being processed entirely on CPU ",
    icon="ℹ️",
)

# Generating a list of options or videos
options = sorted(os.listdir(os.path.join("app_input")))  # Ensure the list is sorted
selected_video = st.selectbox("Choose video", options)

# Find the index of the selected video and calculate the index of the next video
selected_index = options.index(selected_video)
next_video_index = (selected_index + 1) % len(options)  # Ensures looping back to start
next_video = options[next_video_index]

col1, col2 = st.columns(2)

# Function to display video in a column with resizing
def display_video(column, video_path, video_name):
    cap = cv2.VideoCapture(video_path)
    width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    new_width = 320  # Adjust this value for desired width
    new_height = int((new_width / width) * height)
    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break
        frame = cv2.resize(frame, (new_width, new_height))
        column.image(frame, channels="BGR")
    cap.release()

# Displaying the selected video in the first column
with col1:
    file_path = os.path.join("app_input", selected_video)
    video_name = selected_video.split(".")[0]
    display_video(col1, file_path, video_name)

# Displaying the next video in the second column
with col2:
    st.info("Expected Result !")
    next_file_path = os.path.join("app_input", next_video)
    next_video_name = next_video.split(".")[0]
    display_video(col2, next_file_path, next_video_name)

# Assuming further processing (like generating predictions) is only intended for the first (selected) video
with col1, st.spinner("Processing video..."):
    video, img_p, files = load_video(f"{video_name}.mp4", opt.device)
    coordinates = generate_lip_coordinates(f"{video_name}_samples")
    # Assuming 'frames_generated' and 'coordinates_generated' are used for control flow or further processing
    frames_generated = True
    coordinates_generated = True
    if frames_generated and coordinates_generated:
        st.markdown(f"Frames Generated for {video_name}:\n{files}")
        st.markdown(f"Coordinates Generated for {video_name}:\n{coordinates}")

with col2:
    st.info("Ready to make prediction!")
    generate = st.button("Generate")
    if generate:
        with st.spinner("Generating..."):
            y = model(
                video[None, ...].to(opt.device),
                coordinates[None, ...].to(opt.device),
            )
            txt = ctc_decode(y[0])
            st.text(txt[-1])

st.info("Author ©️ : Wissem Karous ")
st.info("Made with ❤️")