File size: 1,845 Bytes
5fd9575
 
 
 
 
 
5b84e6f
5fd9575
 
 
 
 
 
 
5b84e6f
5fd9575
 
 
 
 
 
 
 
 
5b84e6f
5fd9575
 
 
 
 
 
5b84e6f
5fd9575
 
 
 
 
5b84e6f
5fd9575
 
 
 
 
 
 
 
 
 
 
 
 
 
5e6f79d
5fd9575
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
import gradio as gr
import cv2
import threading
import torch
from transformers import BlipProcessor, BlipForConditionalGeneration
from PIL import Image
import spaces

# Initialize the webcam
cap = cv2.VideoCapture(0)

# Load the Hugging Face model and processor
processor = BlipProcessor.from_pretrained("Salesforce/blip-vqa-base")
model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-vqa-base").to("cuda" if torch.cuda.is_available() else "cpu")
@spaces.GPU
def query_the_image(query: str, image_data: bytes):
    try:
        image = Image.open(io.BytesIO(image_data)).convert("RGB")
        inputs = processor(image, query, return_tensors="pt").to(model.device)
        output = model.generate(**inputs)
        answer = processor.decode(output[0], skip_special_tokens=True)
        return answer
    except Exception as e:
        return f"Error: {e}"
@spaces.GPU
def get_frame():
    ret, frame = cap.read()
    if not ret:
        return None
    _, buffer = cv2.imencode('.jpg', frame)
    return buffer.tobytes()
@spaces.GPU
def process_image(prompt):
    frame_data = get_frame()
    if frame_data:
        return query_the_image(prompt, frame_data)
    return "Error capturing image"
@spaces.GPU
def video_feed():
    while True:
        ret, frame = cap.read()
        if ret:
            yield cv2.imencode('.jpg', frame)[1].tobytes()
        else:
            break

gui = gr.Blocks()
with gui:
    gr.Markdown("# Live Video AI Assistant")
    with gr.Row():
        video_component = gr.Video()
        threading.Thread(target=video_feed, daemon=True).start()
    prompt = gr.Textbox(label="Enter your safety policy for the AI to analyse each frame in real time")
    response = gr.Textbox(label="AI Response")
    btn = gr.Button("Ask")
    btn.click(process_image, inputs=prompt, outputs=response)

gui.launch()