# Standard Python imports import os import re import json from typing import List, Dict, Any # Data processing and visualization from PIL import Image from tqdm import tqdm from tqdm.notebook import tqdm # Deep Learning & ML import torch from transformers import ( AutoProcessor, AutoModelForVision2Seq, AutoTokenizer, AutoModelForCausalLM, TextStreamer, Idefics3ForConditionalGeneration, BitsAndBytesConfig ) from unsloth import FastVisionModel # Dataset handling from datasets import load_from_disk # API & Authentication from huggingface_hub import login # UI & Environment import gradio as gr from dotenv import load_dotenv # Available models MODELS = { "Blood Cell Classifier with Llama-3.2": "laurru01/Llama-3.2-11B-Vision-Instruct-ft-PeripherallBloodCells", "Blood Cell Classifier with Qwen2-VL": "laurru01/Qwen2-VL-2B-Instruct-ft-bloodcells-big", "Blood Cell Classifier with SmolVLM": "laurru01/SmolVLM-Instruct-ft-PeripherallBloodCells", } # Global dictionary to store loaded models loaded_models = {} def initialize_models(): """Preload all models during startup""" print("Initializing models...") for model_name, model_path in MODELS.items(): print(f"Loading {model_name}...") try: if "SmolVLM" in model_name: # Carga específica para SmolVLM base_model = Idefics3ForConditionalGeneration.from_pretrained( "HuggingFaceTB/SmolVLM-Instruct", device_map="auto", torch_dtype=torch.bfloat16, load_in_4bit=True, max_memory={0: "12GB"} ) base_model.load_adapter(model_path) processor = AutoProcessor.from_pretrained("HuggingFaceTB/SmolVLM-Instruct") loaded_models[model_name] = { "model": base_model, "processor": processor, "type": "smolvlm" } else: # Carga original para Llama y Qwen (sin cambios) model, tokenizer = FastVisionModel.from_pretrained( model_name=model_path, load_in_4bit=True, use_gradient_checkpointing="unsloth" ) FastVisionModel.for_inference(model) processor = AutoProcessor.from_pretrained(model_path) loaded_models[model_name] = { "model": model, "tokenizer": tokenizer, "processor": processor, "type": "standard" } print(f"Successfully loaded {model_name}") except Exception as e: print(f"Error loading {model_name}: {str(e)}") print("Model initialization complete") def extract_cell_type(text): """Extract cell type from generated description""" cell_types = ['neutrophil', 'lymphocyte', 'monocyte', 'eosinophil', 'basophil'] text_lower = text.lower() for cell_type in cell_types: if cell_type in text_lower: return cell_type.capitalize() return "Unidentified Cell Type" @torch.no_grad() def generate_description_standard(model, tokenizer, image): """Generate description using standard models (Llama and Qwen)""" messages = [{ "role": "user", "content": [ {"type": "image"}, {"type": "text", "text": "As a hematologist, carefully identify the type of blood cell in this image and describe its key characteristics."} ]}] input_text = tokenizer.apply_chat_template(messages, add_generation_prompt=True) inputs = tokenizer(image, input_text, add_special_tokens=False, return_tensors="pt").to("cuda") text_streamer = TextStreamer(tokenizer, skip_prompt=True) output = model.generate( **inputs, streamer=text_streamer, max_new_tokens=1024, use_cache=True, temperature=1.5, min_p=0.1 ) raw_output = tokenizer.decode(output[0], skip_special_tokens=True) if "The provided image" in raw_output: start_idx = raw_output.find("assistant") cleaned_output = raw_output[start_idx:] else: cleaned_output = raw_output return cleaned_output.strip() @torch.no_grad() def generate_description_smolvlm(model, processor, image): """Generate description using SmolVLM model with memory-efficient settings""" if image.mode != "RGB": image = image.convert("RGB") # Redimensionar a un tamaño más pequeño para reducir memoria max_size = 192 image.thumbnail((max_size, max_size), Image.Resampling.LANCZOS) sample = [{ "role": "user", "content": [ {"type": "image", "image": image}, {"type": "text", "text": "As a hematologist, carefully identify the type of blood cell in this image and describe its key characteristics."} ] }] text_input = processor.apply_chat_template( sample, add_generation_prompt=True ) try: torch.cuda.empty_cache() with torch.cuda.amp.autocast(): model_inputs = processor( text=text_input, images=[[image]], return_tensors="pt", ).to("cuda") generated_ids = model.generate( **model_inputs, max_new_tokens=256, do_sample=True, temperature=0.7, top_p=0.9, repetition_penalty=1.5, no_repeat_ngram_size=3, num_beams=2, length_penalty=1.0, early_stopping=True, use_cache=True, pad_token_id=processor.tokenizer.pad_token_id, ) response_ids = generated_ids[0][len(model_inputs.input_ids[0]):] output_text = processor.decode( response_ids, skip_special_tokens=True, clean_up_tokenization_spaces=True ).strip() if len(set(output_text.split())) < 5: output_text = "Error: Generated response was too repetitive. Please try again." del model_inputs, generated_ids, response_ids torch.cuda.empty_cache() return output_text except Exception as e: torch.cuda.empty_cache() raise e def analyze_cell(image, model_name): """Main function to analyze cell images""" if not isinstance(image, Image.Image): return "Invalid image format. Please upload a valid image.", "", None try: if model_name not in loaded_models: return f"Model {model_name} not loaded.", "", None model_components = loaded_models[model_name] if model_components["type"] == "smolvlm": description = generate_description_smolvlm( model_components["model"], model_components["processor"], image ) else: description = generate_description_standard( model_components["model"], model_components["tokenizer"], image ) cell_type = extract_cell_type(description) return cell_type, description, image except Exception as e: return f"Error occurred: {str(e)}", "", None # Initialize all models before starting the interface initialize_models() # Gradio Interface with gr.Blocks() as iface: gr.HTML("

Blood Cell Analyzer

") gr.HTML("

Upload a microscopic blood cell image for instant classification and detailed analysis

") with gr.Row(): with gr.Column(): input_image = gr.Image( label="Upload Blood Cell Image", type="pil", sources=["upload"] ) model_dropdown = gr.Dropdown( choices=list(MODELS.keys()), value=list(MODELS.keys())[0], label="Select Model Version" ) submit_btn = gr.Button("Analyze Cell") with gr.Column(): cell_type = gr.Textbox(label="Identified Cell Type") description = gr.Textbox(label="Analysis Details", lines=8) output_image = gr.Image(label="Analyzed Image") submit_btn.click( fn=analyze_cell, inputs=[input_image, model_dropdown], outputs=[cell_type, description, output_image] ) # Enhanced CSS with modern color scheme custom_css = """ .container { max-width: 1000px; margin: auto; padding: 30px; background: linear-gradient(135deg, #f6f9fc 0%, #ffffff 100%); border-radius: 20px; box-shadow: 0 10px 20px rgba(0,0,0,0.05); } .title { text-align: center; color: #2d3436; font-size: 3em; font-weight: 700; margin-bottom: 20px; text-shadow: 2px 2px 4px rgba(0,0,0,0.1); } .subtitle { text-align: center; color: #636e72; font-size: 1.2em; margin-bottom: 40px; } .input-image { border: 2px dashed #74b9ff; border-radius: 15px; padding: 20px; transition: all 0.3s ease; } .input-image:hover { border-color: #0984e3; transform: translateY(-2px); } .model-dropdown { background: #f8f9fa; border-radius: 10px; border: 1px solid #dfe6e9; margin: 15px 0; } .submit-button { background: linear-gradient(45deg, #0984e3, #74b9ff); color: white; border: none; padding: 12px 25px; border-radius: 10px; font-weight: 600; transition: all 0.3s ease; } .submit-button:hover { transform: translateY(-2px); box-shadow: 0 5px 15px rgba(9, 132, 227, 0.3); } .result-box { background: white; border-radius: 10px; border: 1px solid #dfe6e9; padding: 15px; margin: 10px 0; } .output-image { border-radius: 15px; overflow: hidden; box-shadow: 0 5px 15px rgba(0,0,0,0.1); } """ # Interface with gr.Blocks(css=custom_css) as iface: gr.HTML("

Blood Cell Classifier

") gr.HTML("

Upload a microscopic blood cell image for instant classification and detailed analysis

") with gr.Row(): with gr.Column(): input_image = gr.Image( label="Upload Blood Cell Image", type="pil", sources=["upload"], # Only allow computer uploads elem_classes="input-image" ) model_dropdown = gr.Dropdown( choices=list(MODELS.keys()), value=list(MODELS.keys())[0], label="Select Model Version", elem_classes="model-dropdown" ) submit_btn = gr.Button( "Analyze Cell", variant="primary", elem_classes="submit-button" ) with gr.Column(): cell_type = gr.Textbox( label="Identified Cell Type", elem_classes="result-box" ) description = gr.Textbox( label="Analysis Details", lines=8, elem_classes="result-box" ) output_image = gr.Image( label="Analyzed Image", elem_classes="output-image" ) submit_btn.click( fn=analyze_cell, inputs=[input_image, model_dropdown], outputs=[cell_type, description, output_image] ) gr.HTML("""

Developed by Laura Ruiz | MSc Bioinformatics and Biostatistics

View on GitHub
""") # Launch the interface iface.launch()