import gradio as gr
import cv2
import ollama
import threading

# Initialize the webcam
cap = cv2.VideoCapture(0)

def query_the_image(query: str, image_list: list[str]):
    try:
        res = ollama.chat(
            model='llava',
            options={
                'temperature': 0,
                "top_k": 1,
                'top_p': 0.1,
                'mirostat_tau': 1.0,
                'num_ctx': 1024,
                'seed': 42,
                'num_predict': 128
            },
            messages=[
                {
                    'role': 'system',
                    'content': "You are a home surveillance system. Answer with very short sentences."
                },
                {
                    'role': 'user',
                    'content': query,
                    'images': image_list,
                }
            ]
        )
        return res['message']['content']
    except Exception as e:
        return f"Error: {e}"

def get_frame():
    ret, frame = cap.read()
    if not ret:
        return None
    _, buffer = cv2.imencode('.jpg', frame)
    return buffer.tobytes()

def process_image(prompt):
    frame_data = get_frame()
    if frame_data:
        return query_the_image(prompt, [frame_data])
    return "Error capturing image"

def video_feed():
    while True:
        ret, frame = cap.read()
        if ret:
            yield cv2.imencode('.jpg', frame)[1].tobytes()
        else:
            break

gui = gr.Blocks()
with gui:
    gr.Markdown("# Live Video AI Assistant")
    with gr.Row():
        video_component = gr.Video()
        threading.Thread(target=video_feed, daemon=True).start()
    prompt = gr.Textbox(label="Enter your question")
    response = gr.Textbox(label="AI Response")
    btn = gr.Button("Ask")
    btn.click(process_image, inputs=prompt, outputs=response)

gui.launch()