Spaces:
Sleeping
Sleeping
File size: 1,929 Bytes
df97b0c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 |
import gradio as gr
import cv2
import ollama
import threading
# Initialize the webcam
cap = cv2.VideoCapture(0)
def query_the_image(query: str, image_list: list[str]):
try:
res = ollama.chat(
model='llava',
options={
'temperature': 0,
"top_k": 1,
'top_p': 0.1,
'mirostat_tau': 1.0,
'num_ctx': 1024,
'seed': 42,
'num_predict': 128
},
messages=[
{
'role': 'system',
'content': "You are a home surveillance system. Answer with very short sentences."
},
{
'role': 'user',
'content': query,
'images': image_list,
}
]
)
return res['message']['content']
except Exception as e:
return f"Error: {e}"
def get_frame():
ret, frame = cap.read()
if not ret:
return None
_, buffer = cv2.imencode('.jpg', frame)
return buffer.tobytes()
def process_image(prompt):
frame_data = get_frame()
if frame_data:
return query_the_image(prompt, [frame_data])
return "Error capturing image"
def video_feed():
while True:
ret, frame = cap.read()
if ret:
yield cv2.imencode('.jpg', frame)[1].tobytes()
else:
break
gui = gr.Blocks()
with gui:
gr.Markdown("# Live Video AI Assistant")
with gr.Row():
video_component = gr.Video()
threading.Thread(target=video_feed, daemon=True).start()
prompt = gr.Textbox(label="Enter your question")
response = gr.Textbox(label="AI Response")
btn = gr.Button("Ask")
btn.click(process_image, inputs=prompt, outputs=response)
gui.launch()
|