import torch import gradio as gr from transformers import Blip2ForConditionalGeneration, AutoProcessor, BitsAndBytesConfig from peft import PeftModel, PeftConfig # Load the PEFT model configuration and quantization settings peft_model_id = "Prasi21/blip2-opt-2.7b-strep-throat-caption-adapters3" config = PeftConfig.from_pretrained(peft_model_id) config.base_model_name_or_path = "Prasi21/blip2-opt-2.7b-strep-throat-caption-adapters3" # Enable 8-bit quantization for more efficient loading quantization_config = BitsAndBytesConfig(load_in_8bit=True) # Load the base model with quantization model = Blip2ForConditionalGeneration.from_pretrained( config.base_model_name_or_path, quantization_config=quantization_config, device_map="auto" ) # Load the fine-tuned PEFT model model = PeftModel.from_pretrained(model, peft_model_id) # Load the processor processor = AutoProcessor.from_pretrained("Salesforce/blip2-opt-2.7b") # Define the prediction function def predict(image): # Preprocess the image inputs = processor(images=image, return_tensors="pt").to(device, torch.float16) new_eos_token_id = 13 with torch.no_grad(): generated_ids = model.generate(**inputs, max_length=100, eos_token_id=new_eos_token_id) generated_caption = processor.batch_decode(generated_ids, skip_special_tokens=True) return f"{generated_caption[0]}" # Set up the Gradio interface demo = gr.Interface( fn=predict, inputs=gr.Image(type="pil"), # Upload an image in PIL format outputs=gr.Textbox(), # The output will be the generated caption title="Strep Throat Image Assessment", description="Upload an image of a throat and receive a medical assessment caption based on the model's output." ) # Launch the Gradio app demo.launch()