import logging
import queue
from typing import List, NamedTuple
import av
import cv2
import numpy as np
import streamlit as st
from streamlit_webrtc import WebRtcMode, webrtc_streamer
from sample_utils.turn import get_ice_servers
from cvzone.HandTrackingModule import HandDetector
from cvzone.SelfiSegmentationModule import SelfiSegmentation
import os
import time

# Logger Setup
logger = logging.getLogger(__name__)

# Streamlit settings
st.set_page_config(page_title="Virtual Keyboard", page_icon="🏋️")
st.title("Interactive Virtual Keyboard")
st.subheader('''Turn on the webcam and use hand gestures to interact with the virtual keyboard.
Use 'a' and 'd' from the keyboard to change the background.''')

# Initialize modules
detector = HandDetector(maxHands=1, detectionCon=0.85)
segmentor = SelfiSegmentation()

# Define virtual keyboard layout
keys = [["Q", "W", "E", "R", "T", "Y", "U", "I", "O", "P"],
        ["A", "S", "D", "F", "G", "H", "J", "K", "L", ";"],
        ["Z", "X", "C", "V", "B", "N", "M", ",", ".", "/"]]

class Button:
    def __init__(self, pos, text, size=[100, 100]):
        self.pos = pos
        self.size = size
        self.text = text

class Detection(NamedTuple):
    label: str
    score: float
    box: np.ndarray

result_queue: "queue.Queue[List[Detection]]" = queue.Queue()

# Load background images
listImg = os.listdir('model/street') if os.path.exists('model/street') else []
if not listImg:
    st.error("Error: 'street' directory is missing or empty. Please add background images.")
    st.stop()
else:
    imgList = [cv2.imread(f'model/street/{imgPath}') for imgPath in listImg]
    imgList = [img for img in imgList if img is not None]

indexImg = 0
output_text = ""

if "output_text" not in st.session_state:
    st.session_state["output_text"] = ""

# Video Frame Callback
def video_frame_callback(frame: av.VideoFrame) -> av.VideoFrame:
    global indexImg, output_text

    img = frame.to_ndarray(format="bgr24")
    hands, img = detector.findHands(img, draw=True)

    detections = []
    if hands:
        for hand in hands:
            bbox = hand['bbox']
            label = "Hand"
            score = hand['score']
            box = np.array([bbox[0], bbox[1], bbox[0] + bbox[2], bbox[1] + bbox[3]])
            detections.append(Detection(label=label, score=score, box=box))

    result_queue.put(detections)
    st.session_state["output_text"] = output_text
    return av.VideoFrame.from_ndarray(img, format="bgr24")

# WebRTC Streamer
webrtc_streamer(
    key="virtual-keyboard",
    mode=WebRtcMode.SENDRECV,
    rtc_configuration={"iceServers": get_ice_servers(), "iceTransportPolicy": "relay"},
    media_stream_constraints={"video": True, "audio": False},
    video_frame_callback=video_frame_callback,
    async_processing=True,
)