Spaces:
Running
Running
import gradio as gr | |
from transformers import OwlViTProcessor, OwlViTForObjectDetection | |
from PIL import Image | |
# Load the OWL-ViT model and processor | |
processor = OwlViTProcessor.from_pretrained("google/owlvit-base-patch32") | |
model = OwlViTForObjectDetection.from_pretrained("google/owlvit-base-patch32") | |
# Define the prediction function | |
def predict(image): | |
image = Image.open(image) | |
text_queries = ["A photo of a pokemon", "a photo of a human face", "a photo of a couch"] # Example queries | |
# Prepare inputs for the model | |
inputs = processor(text=text_queries, images=image, return_tensors="pt") | |
# Perform inference | |
outputs = model(**inputs) | |
# Format the response (dummy response as example) | |
response = {"message": "Detection successful!"} | |
return response | |
# Create a Gradio interface and enable the API mode | |
interface = gr.Interface( | |
fn=predict, | |
inputs="image", | |
outputs="json", | |
allow_flagging="never", | |
live=True # Allows the API endpoint to remain active | |
) | |
# Launch the interface with API mode enabled | |
interface.launch(server_name="0.0.0.0", server_port=7860, share=True) | |