import torch from transformers import AutoProcessor, AutoModelForVision2Seq from PIL import Image import gradio as gr # Define the folder where the model and processor are saved saved_folder_path = "model_folder" # Replace with the path to your model folder # Load processor and model processor = AutoProcessor.from_pretrained(saved_folder_path) # Processor (e.g., feature extractor + tokenizer) model = AutoModelForVision2Seq.from_pretrained(saved_folder_path) # Pre-trained BLIP model model.eval() # Set model to evaluation mode # Define the caption generation function def generate_caption(image): # Convert the input image to PIL format (if necessary) image = Image.fromarray(image) # Preprocess the image using the processor inputs = processor(images=image, return_tensors="pt") pixel_values = inputs.pixel_values # Generate caption generated_ids = model.generate(pixel_values=pixel_values, max_length=50) generated_caption = processor.batch_decode(generated_ids, skip_special_tokens=True)[0] return generated_caption # # Examples for testing # examples = [ # ["example_images/image1.jpg"], # Replace with paths to example images # ["example_images/image2.jpg"] # ] # Define the Gradio interface interface = gr.Interface( fn=generate_caption, # Function to process input inputs=gr.Image(label="Upload an Image"), # Add a label to input outputs=gr.Textbox(label="Generated Caption", lines=2), # Larger textbox for output # examples=examples, # Add example images live=True, # Enable live prediction title="📸 Image Caption Generator", # Add a title description="Upload an chest x-ray image to generate a descriptive caption using our AI model. Built with Transformers and Gradio.", # Add a description theme="allenai/gradio-theme", # Use Gradio's built-in themes css=".output { font-size: 16px; padding: 10px; border: 1px solid #ccc; border-radius: 5px; }", # Custom CSS for output styling ) # Launch the Gradio app interface.launch()