Spaces:

shorndrup
/

owlvit_api

Running

owlvit_api / app.py

2d88cb5 1 day ago

1.14 kB

	import gradio as gr
	from transformers import OwlViTProcessor, OwlViTForObjectDetection
	from PIL import Image

	# Load the OWL-ViT model and processor
	processor = OwlViTProcessor.from_pretrained("google/owlvit-base-patch32")
	model = OwlViTForObjectDetection.from_pretrained("google/owlvit-base-patch32")

	# Define the prediction function
	def predict(image):
	image = Image.open(image)
	text_queries = ["A photo of a pokemon", "a photo of a human face", "a photo of a couch"] # Example queries

	# Prepare inputs for the model
	inputs = processor(text=text_queries, images=image, return_tensors="pt")
	# Perform inference
	outputs = model(**inputs)

	# Format the response (dummy response as example)
	response = {"message": "Detection successful!"}
	return response

	# Create a Gradio interface and enable the API mode
	interface = gr.Interface(
	fn=predict,
	inputs="image",
	outputs="json",
	allow_flagging="never",
	live=True # Allows the API endpoint to remain active
	)

	# Launch the interface with API mode enabled
	interface.launch(server_name="0.0.0.0", server_port=7860, share=True)