Spaces:

annie08
/

caption-generator-transformer-for-vision-language

Sleeping

fixes

95c8975 7 months ago

1.36 kB

	import torch
	from transformers import AutoProcessor, AutoModelForVision2Seq
	from PIL import Image
	import gradio as gr

	# Define the folder where the model and processor are saved
	saved_folder_path = "model_folder" # Replace with the path to your model folder

	# Load processor and model
	processor = AutoProcessor.from_pretrained(saved_folder_path) # Processor (e.g., feature extractor + tokenizer)
	model = AutoModelForVision2Seq.from_pretrained(saved_folder_path) # Pre-trained BLIP model
	model.eval() # Set model to evaluation mode

	# Define the caption generation function
	def generate_caption(image):
	# Convert the input image to PIL format (if necessary)
	image = Image.fromarray(image)

	# Preprocess the image using the processor
	inputs = processor(images=image, return_tensors="pt")
	pixel_values = inputs.pixel_values

	# Generate caption
	generated_ids = model.generate(pixel_values=pixel_values, max_length=50)
	generated_caption = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]

	return generated_caption

	# Define the Gradio interface
	interface = gr.Interface(
	fn=generate_caption, # Function to process input
	inputs=gr.Image(), # Input as image
	outputs=gr.Textbox(), # Output as text
	live=True # Enable live prediction
	)

	# Launch the Gradio app
	interface.launch()