IDEFICS3_ROCO

Running

IDEFICS3_ROCO / app.py

æLtorio

add decriptions

1d6cff4 unverified 3 days ago

4.16 kB

	# Copyright 2024 Ronan Le Meillat
	# Licensed under the Apache License, Version 2.0 (the "License");
	# you may not use this file except in compliance with the License.
	# You may obtain a copy of the License at
	# http://www.apache.org/licenses/LICENSE-2.0
	# Import necessary libraries
	import gradio as gr
	from transformers import AutoProcessor, Idefics3ForConditionalGeneration, image_utils
	import torch

	# Determine the device (GPU or CPU) to run the model on
	device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
	print(f"Using device: {device}") # Log the device being used

	# Define the model ID and base model path
	model_id = "eltorio/IDEFICS3_ROCO"
	base_model_path = "HuggingFaceM4/Idefics3-8B-Llama3" # or change to local path

	# Initialize the processor from the base model path
	processor = AutoProcessor.from_pretrained(base_model_path, trust_remote_code=True)

	# Initialize the model from the base model path and set the torch dtype to bfloat16
	model = Idefics3ForConditionalGeneration.from_pretrained(
	base_model_path, torch_dtype=torch.bfloat16
	).to(device) # Move the model to the specified device

	# Load the adapter from the model ID and automatically map it to the device
	model.load_adapter(model_id, device_map="auto")

	# Define a function to infer a description from an image
	def infere(image):
	"""
	Generate a description of a medical image.

	Args:
	- image (PIL Image): The medical image to describe.

	Returns:
	- generated_texts (List[str]): A list containing the generated description.
	"""

	# Define a chat template for the model to respond to
	messages = [
	{
	"role": "system",
	"content": [
	{"type": "text", "text": "You are a valuable medical doctor and you are looking at an image of your patient."},
	]
	},
	{
	"role": "user",
	"content": [
	{"type": "image"},
	{"type": "text", "text": "What do we see in this image?"},
	]
	},
	]

	# Apply the chat template and add a generation prompt
	prompt = processor.apply_chat_template(messages, add_generation_prompt=True)

	# Preprocess the input image and text
	inputs = processor(text=prompt, images=[image], return_tensors="pt")

	# Move the inputs to the specified device
	inputs = {k: v.to(device) for k, v in inputs.items()}

	# Generate a description with the model
	generated_ids = model.generate(**inputs, max_new_tokens=100)

	# Decode the generated IDs into text
	generated_texts = processor.batch_decode(generated_ids, skip_special_tokens=True)

	return generated_texts

	# Define the title, description, and device description for the Gradio interface
	title = f"<a href='https://huggingface.co/eltorio/IDEFICS3_ROCO'>IDEFICS3_ROCO</a>: Medical Image to Text <b>running on {device}</b>"
	desc = "This model generates a description of a medical image."

	device_desc = f"This model is running on {device} 🚀." if device == torch.device('cuda') else f"🐢 This model is running on {device} it will be very (very) slow. If you can donate some GPU time it will be usable 🐢. <a href='https://huggingface.co/eltorio/IDEFICS3_ROCO/discussions'>Please contact us.</a>"

	# Define the long description for the Gradio interface
	long_desc = f"This model is based on the <a href='https://huggingface.co/eltorio/IDEFICS3_ROCO'>IDEFICS3_ROCO model</a>, which is a multimodal model that can generate text from images. It has been fine-tuned on <a href='https://huggingface.co/datasets/eltorio/ROCO-radiology'>eltorio/ROCO-radiology</a> a dataset of medical images and can generate descriptions of medical images. Try uploading an image of a medical image and see what the model generates!<br><b>{device_desc}</b><br> 2024 - Ronan Le Meillat"

	# Create a Gradio interface with the infere function and specified title and descriptions
	radiotest = gr.Interface(fn=infere, inputs="image", outputs="text", title=title,
	description=desc, article=long_desc)

	# Launch the Gradio interface and share it
	radiotest.launch(share=True)