shorndrup commited on
Commit
fa75e04
1 Parent(s): 06bfaef

Initial commit

Browse files
Files changed (2) hide show
  1. app.py +37 -0
  2. requirements.txt +4 -0
app.py ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from transformers import OwlViTProcessor, OwlViTForObjectDetection
2
+ from PIL import Image
3
+ import torch
4
+ import gradio as gr
5
+
6
+ # Load model and processor
7
+ processor = OwlViTProcessor.from_pretrained("google/owlvit-base-patch32")
8
+ model = OwlViTForObjectDetection.from_pretrained("google/owlvit-base-patch32")
9
+
10
+ def predict(image):
11
+ # Prepare image
12
+ image = Image.open(image).convert("RGB")
13
+
14
+ # Define inputs (zero-shot queries)
15
+ text_queries = ["A Pokémon", "Pikachu", "Bulbasaur"]
16
+
17
+ # Run the model
18
+ inputs = processor(text=text_queries, images=image, return_tensors="pt")
19
+ with torch.no_grad():
20
+ outputs = model(**inputs)
21
+
22
+ # Get predictions
23
+ target_sizes = torch.tensor([image.size[::-1]])
24
+ results = processor.post_process_object_detection(outputs, target_sizes=target_sizes, threshold=0.1)
25
+
26
+ # Extract boxes
27
+ boxes = []
28
+ for score, label, box in zip(results[0]["scores"], results[0]["labels"], results[0]["boxes"]):
29
+ box = [round(i, 2) for i in box.tolist()]
30
+ label_text = processor.tokenizer.decode([label])
31
+ boxes.append({"score": round(score.item(), 3), "label": label_text, "box": box})
32
+
33
+ return boxes
34
+
35
+ # Create Gradio interface
36
+ interface = gr.Interface(fn=predict, inputs="image", outputs="json")
37
+ interface.launch()
requirements.txt ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ transformers
2
+ torch
3
+ pillow
4
+ gradio