import gradio as gr import cv2 import ollama import threading # Initialize the webcam cap = cv2.VideoCapture(0) def query_the_image(query: str, image_list: list[str]): try: res = ollama.chat( model='llava', options={ 'temperature': 0, "top_k": 1, 'top_p': 0.1, 'mirostat_tau': 1.0, 'num_ctx': 1024, 'seed': 42, 'num_predict': 128 }, messages=[ { 'role': 'system', 'content': "You are a home surveillance system. Answer with very short sentences." }, { 'role': 'user', 'content': query, 'images': image_list, } ] ) return res['message']['content'] except Exception as e: return f"Error: {e}" def get_frame(): ret, frame = cap.read() if not ret: return None _, buffer = cv2.imencode('.jpg', frame) return buffer.tobytes() def process_image(prompt): frame_data = get_frame() if frame_data: return query_the_image(prompt, [frame_data]) return "Error capturing image" def video_feed(): while True: ret, frame = cap.read() if ret: yield cv2.imencode('.jpg', frame)[1].tobytes() else: break gui = gr.Blocks() with gui: gr.Markdown("# Live Video AI Assistant") with gr.Row(): video_component = gr.Video() threading.Thread(target=video_feed, daemon=True).start() prompt = gr.Textbox(label="Enter your question") response = gr.Textbox(label="AI Response") btn = gr.Button("Ask") btn.click(process_image, inputs=prompt, outputs=response) gui.launch()