import torch from PIL import Image from transformers import AutoModel, AutoTokenizer, BitsAndBytesConfig import gradio as gr import os # Retrieve the token from environment variables api_token = os.getenv("HF_TOKEN").strip() # Configure quantization for efficient memory usage bnb_config = BitsAndBytesConfig( load_in_4bit=True, bnb_4bit_quant_type="nf4", bnb_4bit_use_double_quant=True, bnb_4bit_compute_dtype=torch.float16, ) # Load the model and tokenizer with required arguments model = AutoModel.from_pretrained( "ContactDoctor/Bio-Medical-MultiModal-Llama-3-8B-V1", quantization_config=bnb_config, device_map="auto", torch_dtype=torch.float16, trust_remote_code=True, attn_implementation="flash_attention_2", token=api_token # Authenticate with your Hugging Face token ) tokenizer = AutoTokenizer.from_pretrained( "ContactDoctor/Bio-Medical-MultiModal-Llama-3-8B-V1", trust_remote_code=True, token=api_token # Authenticate with your Hugging Face token ) # Function to handle input text and image def process_query(image, question): try: # Convert image to RGB format image = image.convert('RGB') # Construct the input message msgs = [{'role': 'user', 'content': [image, question]}] # Generate response using the model res = model.chat( image=image, msgs=msgs, tokenizer=tokenizer, sampling=True, temperature=0.95, stream=True ) # Collect the generated response generated_text = "" for new_text in res: generated_text += new_text return generated_text except Exception as e: return f"Error: {str(e)}" # Define Gradio interface iface = gr.Interface( fn=process_query, inputs=[ gr.Image(type="pil", label="Upload an Image"), gr.Textbox(label="Enter a Question") ], outputs="text", title="ContactDoctor Multimodal Medical Assistant", description="Upload an image (e.g., X-ray or skin condition) and ask a medical question." ) # Launch the Gradio app with API enabled if __name__ == "__main__": iface.launch(enable_api=True, share=True) # Enables API and generates a public link